module @module {
  util.global private @__auto.token_embd.weight = #stream.parameter.named<"model"::"token_embd.weight"> : tensor<128256x4096xf16>
  util.global private @__auto.blk.0.attn_norm.weight = #stream.parameter.named<"model"::"blk.0.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.0.attn_q.weight = #stream.parameter.named<"model"::"blk.0.attn_q.weight"> : tensor<4096x4096xf16>
  util.global private @__auto.blk.0.attn_k.weight = #stream.parameter.named<"model"::"blk.0.attn_k.weight"> : tensor<1024x4096xf16>
  util.global private @__auto.blk.0.attn_v.weight = #stream.parameter.named<"model"::"blk.0.attn_v.weight"> : tensor<1024x4096xf16>
  util.global private @__auto.blk.0.attn_output.weight = #stream.parameter.named<"model"::"blk.0.attn_output.weight"> : tensor<4096x4096xf16>
  util.global private @__auto.blk.0.ffn_norm.weight = #stream.parameter.named<"model"::"blk.0.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.0.ffn_gate.weight = #stream.parameter.named<"model"::"blk.0.ffn_gate.weight"> : tensor<14336x4096xf16>
  util.global private @__auto.blk.0.ffn_up.weight = #stream.parameter.named<"model"::"blk.0.ffn_up.weight"> : tensor<14336x4096xf16>
  util.global private @__auto.blk.0.ffn_down.weight = #stream.parameter.named<"model"::"blk.0.ffn_down.weight"> : tensor<4096x14336xf16>
  util.global private @__auto.blk.1.attn_norm.weight = #stream.parameter.named<"model"::"blk.1.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.1.attn_q.weight = #stream.parameter.named<"model"::"blk.1.attn_q.weight"> : tensor<4096x4096xf16>
  util.global private @__auto.blk.1.attn_k.weight = #stream.parameter.named<"model"::"blk.1.attn_k.weight"> : tensor<1024x4096xf16>
  util.global private @__auto.blk.1.attn_v.weight = #stream.parameter.named<"model"::"blk.1.attn_v.weight"> : tensor<1024x4096xf16>
  util.global private @__auto.blk.1.attn_output.weight = #stream.parameter.named<"model"::"blk.1.attn_output.weight"> : tensor<4096x4096xf16>
  util.global private @__auto.blk.1.ffn_norm.weight = #stream.parameter.named<"model"::"blk.1.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.1.ffn_gate.weight = #stream.parameter.named<"model"::"blk.1.ffn_gate.weight"> : tensor<14336x4096xf16>
  util.global private @__auto.blk.1.ffn_up.weight = #stream.parameter.named<"model"::"blk.1.ffn_up.weight"> : tensor<14336x4096xf16>
  util.global private @__auto.blk.1.ffn_down.weight = #stream.parameter.named<"model"::"blk.1.ffn_down.weight"> : tensor<4096x14336xf16>
  util.global private @__auto.blk.2.attn_norm.weight = #stream.parameter.named<"model"::"blk.2.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.2.attn_q.weight = #stream.parameter.named<"model"::"blk.2.attn_q.weight"> : tensor<4096x4096xf16>
  util.global private @__auto.blk.2.attn_k.weight = #stream.parameter.named<"model"::"blk.2.attn_k.weight"> : tensor<1024x4096xf16>
  util.global private @__auto.blk.2.attn_v.weight = #stream.parameter.named<"model"::"blk.2.attn_v.weight"> : tensor<1024x4096xf16>
  util.global private @__auto.blk.2.attn_output.weight = #stream.parameter.named<"model"::"blk.2.attn_output.weight"> : tensor<4096x4096xf16>
  util.global private @__auto.blk.2.ffn_norm.weight = #stream.parameter.named<"model"::"blk.2.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.2.ffn_gate.weight = #stream.parameter.named<"model"::"blk.2.ffn_gate.weight"> : tensor<14336x4096xf16>
  util.global private @__auto.blk.2.ffn_up.weight = #stream.parameter.named<"model"::"blk.2.ffn_up.weight"> : tensor<14336x4096xf16>
  util.global private @__auto.blk.2.ffn_down.weight = #stream.parameter.named<"model"::"blk.2.ffn_down.weight"> : tensor<4096x14336xf16>
  util.global private @__auto.blk.3.attn_norm.weight = #stream.parameter.named<"model"::"blk.3.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.3.attn_q.weight = #stream.parameter.named<"model"::"blk.3.attn_q.weight"> : tensor<4096x4096xf16>
  util.global private @__auto.blk.3.attn_k.weight = #stream.parameter.named<"model"::"blk.3.attn_k.weight"> : tensor<1024x4096xf16>
  util.global private @__auto.blk.3.attn_v.weight = #stream.parameter.named<"model"::"blk.3.attn_v.weight"> : tensor<1024x4096xf16>
  util.global private @__auto.blk.3.attn_output.weight = #stream.parameter.named<"model"::"blk.3.attn_output.weight"> : tensor<4096x4096xf16>
  util.global private @__auto.blk.3.ffn_norm.weight = #stream.parameter.named<"model"::"blk.3.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.3.ffn_gate.weight = #stream.parameter.named<"model"::"blk.3.ffn_gate.weight"> : tensor<14336x4096xf16>
  util.global private @__auto.blk.3.ffn_up.weight = #stream.parameter.named<"model"::"blk.3.ffn_up.weight"> : tensor<14336x4096xf16>
  util.global private @__auto.blk.3.ffn_down.weight = #stream.parameter.named<"model"::"blk.3.ffn_down.weight"> : tensor<4096x14336xf16>
  util.global private @__auto.blk.4.attn_norm.weight = #stream.parameter.named<"model"::"blk.4.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.4.attn_q.weight = #stream.parameter.named<"model"::"blk.4.attn_q.weight"> : tensor<4096x4096xf16>
  util.global private @__auto.blk.4.attn_k.weight = #stream.parameter.named<"model"::"blk.4.attn_k.weight"> : tensor<1024x4096xf16>
  util.global private @__auto.blk.4.attn_v.weight = #stream.parameter.named<"model"::"blk.4.attn_v.weight"> : tensor<1024x4096xf16>
  util.global private @__auto.blk.4.attn_output.weight = #stream.parameter.named<"model"::"blk.4.attn_output.weight"> : tensor<4096x4096xf16>
  util.global private @__auto.blk.4.ffn_norm.weight = #stream.parameter.named<"model"::"blk.4.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.4.ffn_gate.weight = #stream.parameter.named<"model"::"blk.4.ffn_gate.weight"> : tensor<14336x4096xf16>
  util.global private @__auto.blk.4.ffn_up.weight = #stream.parameter.named<"model"::"blk.4.ffn_up.weight"> : tensor<14336x4096xf16>
  util.global private @__auto.blk.4.ffn_down.weight = #stream.parameter.named<"model"::"blk.4.ffn_down.weight"> : tensor<4096x14336xf16>
  util.global private @__auto.blk.5.attn_norm.weight = #stream.parameter.named<"model"::"blk.5.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.5.attn_q.weight = #stream.parameter.named<"model"::"blk.5.attn_q.weight"> : tensor<4096x4096xf16>
  util.global private @__auto.blk.5.attn_k.weight = #stream.parameter.named<"model"::"blk.5.attn_k.weight"> : tensor<1024x4096xf16>
  util.global private @__auto.blk.5.attn_v.weight = #stream.parameter.named<"model"::"blk.5.attn_v.weight"> : tensor<1024x4096xf16>
  util.global private @__auto.blk.5.attn_output.weight = #stream.parameter.named<"model"::"blk.5.attn_output.weight"> : tensor<4096x4096xf16>
  util.global private @__auto.blk.5.ffn_norm.weight = #stream.parameter.named<"model"::"blk.5.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.5.ffn_gate.weight = #stream.parameter.named<"model"::"blk.5.ffn_gate.weight"> : tensor<14336x4096xf16>
  util.global private @__auto.blk.5.ffn_up.weight = #stream.parameter.named<"model"::"blk.5.ffn_up.weight"> : tensor<14336x4096xf16>
  util.global private @__auto.blk.5.ffn_down.weight = #stream.parameter.named<"model"::"blk.5.ffn_down.weight"> : tensor<4096x14336xf16>
  util.global private @__auto.blk.6.attn_norm.weight = #stream.parameter.named<"model"::"blk.6.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.6.attn_q.weight = #stream.parameter.named<"model"::"blk.6.attn_q.weight"> : tensor<4096x4096xf16>
  util.global private @__auto.blk.6.attn_k.weight = #stream.parameter.named<"model"::"blk.6.attn_k.weight"> : tensor<1024x4096xf16>
  util.global private @__auto.blk.6.attn_v.weight = #stream.parameter.named<"model"::"blk.6.attn_v.weight"> : tensor<1024x4096xf16>
  util.global private @__auto.blk.6.attn_output.weight = #stream.parameter.named<"model"::"blk.6.attn_output.weight"> : tensor<4096x4096xf16>
  util.global private @__auto.blk.6.ffn_norm.weight = #stream.parameter.named<"model"::"blk.6.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.6.ffn_gate.weight = #stream.parameter.named<"model"::"blk.6.ffn_gate.weight"> : tensor<14336x4096xf16>
  util.global private @__auto.blk.6.ffn_up.weight = #stream.parameter.named<"model"::"blk.6.ffn_up.weight"> : tensor<14336x4096xf16>
  util.global private @__auto.blk.6.ffn_down.weight = #stream.parameter.named<"model"::"blk.6.ffn_down.weight"> : tensor<4096x14336xf16>
  util.global private @__auto.blk.7.attn_norm.weight = #stream.parameter.named<"model"::"blk.7.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.7.attn_q.weight = #stream.parameter.named<"model"::"blk.7.attn_q.weight"> : tensor<4096x4096xf16>
  util.global private @__auto.blk.7.attn_k.weight = #stream.parameter.named<"model"::"blk.7.attn_k.weight"> : tensor<1024x4096xf16>
  util.global private @__auto.blk.7.attn_v.weight = #stream.parameter.named<"model"::"blk.7.attn_v.weight"> : tensor<1024x4096xf16>
  util.global private @__auto.blk.7.attn_output.weight = #stream.parameter.named<"model"::"blk.7.attn_output.weight"> : tensor<4096x4096xf16>
  util.global private @__auto.blk.7.ffn_norm.weight = #stream.parameter.named<"model"::"blk.7.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.7.ffn_gate.weight = #stream.parameter.named<"model"::"blk.7.ffn_gate.weight"> : tensor<14336x4096xf16>
  util.global private @__auto.blk.7.ffn_up.weight = #stream.parameter.named<"model"::"blk.7.ffn_up.weight"> : tensor<14336x4096xf16>
  util.global private @__auto.blk.7.ffn_down.weight = #stream.parameter.named<"model"::"blk.7.ffn_down.weight"> : tensor<4096x14336xf16>
  util.global private @__auto.blk.8.attn_norm.weight = #stream.parameter.named<"model"::"blk.8.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.8.attn_q.weight = #stream.parameter.named<"model"::"blk.8.attn_q.weight"> : tensor<4096x4096xf16>
  util.global private @__auto.blk.8.attn_k.weight = #stream.parameter.named<"model"::"blk.8.attn_k.weight"> : tensor<1024x4096xf16>
  util.global private @__auto.blk.8.attn_v.weight = #stream.parameter.named<"model"::"blk.8.attn_v.weight"> : tensor<1024x4096xf16>
  util.global private @__auto.blk.8.attn_output.weight = #stream.parameter.named<"model"::"blk.8.attn_output.weight"> : tensor<4096x4096xf16>
  util.global private @__auto.blk.8.ffn_norm.weight = #stream.parameter.named<"model"::"blk.8.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.8.ffn_gate.weight = #stream.parameter.named<"model"::"blk.8.ffn_gate.weight"> : tensor<14336x4096xf16>
  util.global private @__auto.blk.8.ffn_up.weight = #stream.parameter.named<"model"::"blk.8.ffn_up.weight"> : tensor<14336x4096xf16>
  util.global private @__auto.blk.8.ffn_down.weight = #stream.parameter.named<"model"::"blk.8.ffn_down.weight"> : tensor<4096x14336xf16>
  util.global private @__auto.blk.9.attn_norm.weight = #stream.parameter.named<"model"::"blk.9.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.9.attn_q.weight = #stream.parameter.named<"model"::"blk.9.attn_q.weight"> : tensor<4096x4096xf16>
  util.global private @__auto.blk.9.attn_k.weight = #stream.parameter.named<"model"::"blk.9.attn_k.weight"> : tensor<1024x4096xf16>
  util.global private @__auto.blk.9.attn_v.weight = #stream.parameter.named<"model"::"blk.9.attn_v.weight"> : tensor<1024x4096xf16>
  util.global private @__auto.blk.9.attn_output.weight = #stream.parameter.named<"model"::"blk.9.attn_output.weight"> : tensor<4096x4096xf16>
  util.global private @__auto.blk.9.ffn_norm.weight = #stream.parameter.named<"model"::"blk.9.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.9.ffn_gate.weight = #stream.parameter.named<"model"::"blk.9.ffn_gate.weight"> : tensor<14336x4096xf16>
  util.global private @__auto.blk.9.ffn_up.weight = #stream.parameter.named<"model"::"blk.9.ffn_up.weight"> : tensor<14336x4096xf16>
  util.global private @__auto.blk.9.ffn_down.weight = #stream.parameter.named<"model"::"blk.9.ffn_down.weight"> : tensor<4096x14336xf16>
  util.global private @__auto.blk.10.attn_norm.weight = #stream.parameter.named<"model"::"blk.10.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.10.attn_q.weight = #stream.parameter.named<"model"::"blk.10.attn_q.weight"> : tensor<4096x4096xf16>
  util.global private @__auto.blk.10.attn_k.weight = #stream.parameter.named<"model"::"blk.10.attn_k.weight"> : tensor<1024x4096xf16>
  util.global private @__auto.blk.10.attn_v.weight = #stream.parameter.named<"model"::"blk.10.attn_v.weight"> : tensor<1024x4096xf16>
  util.global private @__auto.blk.10.attn_output.weight = #stream.parameter.named<"model"::"blk.10.attn_output.weight"> : tensor<4096x4096xf16>
  util.global private @__auto.blk.10.ffn_norm.weight = #stream.parameter.named<"model"::"blk.10.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.10.ffn_gate.weight = #stream.parameter.named<"model"::"blk.10.ffn_gate.weight"> : tensor<14336x4096xf16>
  util.global private @__auto.blk.10.ffn_up.weight = #stream.parameter.named<"model"::"blk.10.ffn_up.weight"> : tensor<14336x4096xf16>
  util.global private @__auto.blk.10.ffn_down.weight = #stream.parameter.named<"model"::"blk.10.ffn_down.weight"> : tensor<4096x14336xf16>
  util.global private @__auto.blk.11.attn_norm.weight = #stream.parameter.named<"model"::"blk.11.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.11.attn_q.weight = #stream.parameter.named<"model"::"blk.11.attn_q.weight"> : tensor<4096x4096xf16>
  util.global private @__auto.blk.11.attn_k.weight = #stream.parameter.named<"model"::"blk.11.attn_k.weight"> : tensor<1024x4096xf16>
  util.global private @__auto.blk.11.attn_v.weight = #stream.parameter.named<"model"::"blk.11.attn_v.weight"> : tensor<1024x4096xf16>
  util.global private @__auto.blk.11.attn_output.weight = #stream.parameter.named<"model"::"blk.11.attn_output.weight"> : tensor<4096x4096xf16>
  util.global private @__auto.blk.11.ffn_norm.weight = #stream.parameter.named<"model"::"blk.11.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.11.ffn_gate.weight = #stream.parameter.named<"model"::"blk.11.ffn_gate.weight"> : tensor<14336x4096xf16>
  util.global private @__auto.blk.11.ffn_up.weight = #stream.parameter.named<"model"::"blk.11.ffn_up.weight"> : tensor<14336x4096xf16>
  util.global private @__auto.blk.11.ffn_down.weight = #stream.parameter.named<"model"::"blk.11.ffn_down.weight"> : tensor<4096x14336xf16>
  util.global private @__auto.blk.12.attn_norm.weight = #stream.parameter.named<"model"::"blk.12.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.12.attn_q.weight = #stream.parameter.named<"model"::"blk.12.attn_q.weight"> : tensor<4096x4096xf16>
  util.global private @__auto.blk.12.attn_k.weight = #stream.parameter.named<"model"::"blk.12.attn_k.weight"> : tensor<1024x4096xf16>
  util.global private @__auto.blk.12.attn_v.weight = #stream.parameter.named<"model"::"blk.12.attn_v.weight"> : tensor<1024x4096xf16>
  util.global private @__auto.blk.12.attn_output.weight = #stream.parameter.named<"model"::"blk.12.attn_output.weight"> : tensor<4096x4096xf16>
  util.global private @__auto.blk.12.ffn_norm.weight = #stream.parameter.named<"model"::"blk.12.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.12.ffn_gate.weight = #stream.parameter.named<"model"::"blk.12.ffn_gate.weight"> : tensor<14336x4096xf16>
  util.global private @__auto.blk.12.ffn_up.weight = #stream.parameter.named<"model"::"blk.12.ffn_up.weight"> : tensor<14336x4096xf16>
  util.global private @__auto.blk.12.ffn_down.weight = #stream.parameter.named<"model"::"blk.12.ffn_down.weight"> : tensor<4096x14336xf16>
  util.global private @__auto.blk.13.attn_norm.weight = #stream.parameter.named<"model"::"blk.13.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.13.attn_q.weight = #stream.parameter.named<"model"::"blk.13.attn_q.weight"> : tensor<4096x4096xf16>
  util.global private @__auto.blk.13.attn_k.weight = #stream.parameter.named<"model"::"blk.13.attn_k.weight"> : tensor<1024x4096xf16>
  util.global private @__auto.blk.13.attn_v.weight = #stream.parameter.named<"model"::"blk.13.attn_v.weight"> : tensor<1024x4096xf16>
  util.global private @__auto.blk.13.attn_output.weight = #stream.parameter.named<"model"::"blk.13.attn_output.weight"> : tensor<4096x4096xf16>
  util.global private @__auto.blk.13.ffn_norm.weight = #stream.parameter.named<"model"::"blk.13.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.13.ffn_gate.weight = #stream.parameter.named<"model"::"blk.13.ffn_gate.weight"> : tensor<14336x4096xf16>
  util.global private @__auto.blk.13.ffn_up.weight = #stream.parameter.named<"model"::"blk.13.ffn_up.weight"> : tensor<14336x4096xf16>
  util.global private @__auto.blk.13.ffn_down.weight = #stream.parameter.named<"model"::"blk.13.ffn_down.weight"> : tensor<4096x14336xf16>
  util.global private @__auto.blk.14.attn_norm.weight = #stream.parameter.named<"model"::"blk.14.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.14.attn_q.weight = #stream.parameter.named<"model"::"blk.14.attn_q.weight"> : tensor<4096x4096xf16>
  util.global private @__auto.blk.14.attn_k.weight = #stream.parameter.named<"model"::"blk.14.attn_k.weight"> : tensor<1024x4096xf16>
  util.global private @__auto.blk.14.attn_v.weight = #stream.parameter.named<"model"::"blk.14.attn_v.weight"> : tensor<1024x4096xf16>
  util.global private @__auto.blk.14.attn_output.weight = #stream.parameter.named<"model"::"blk.14.attn_output.weight"> : tensor<4096x4096xf16>
  util.global private @__auto.blk.14.ffn_norm.weight = #stream.parameter.named<"model"::"blk.14.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.14.ffn_gate.weight = #stream.parameter.named<"model"::"blk.14.ffn_gate.weight"> : tensor<14336x4096xf16>
  util.global private @__auto.blk.14.ffn_up.weight = #stream.parameter.named<"model"::"blk.14.ffn_up.weight"> : tensor<14336x4096xf16>
  util.global private @__auto.blk.14.ffn_down.weight = #stream.parameter.named<"model"::"blk.14.ffn_down.weight"> : tensor<4096x14336xf16>
  util.global private @__auto.blk.15.attn_norm.weight = #stream.parameter.named<"model"::"blk.15.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.15.attn_q.weight = #stream.parameter.named<"model"::"blk.15.attn_q.weight"> : tensor<4096x4096xf16>
  util.global private @__auto.blk.15.attn_k.weight = #stream.parameter.named<"model"::"blk.15.attn_k.weight"> : tensor<1024x4096xf16>
  util.global private @__auto.blk.15.attn_v.weight = #stream.parameter.named<"model"::"blk.15.attn_v.weight"> : tensor<1024x4096xf16>
  util.global private @__auto.blk.15.attn_output.weight = #stream.parameter.named<"model"::"blk.15.attn_output.weight"> : tensor<4096x4096xf16>
  util.global private @__auto.blk.15.ffn_norm.weight = #stream.parameter.named<"model"::"blk.15.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.15.ffn_gate.weight = #stream.parameter.named<"model"::"blk.15.ffn_gate.weight"> : tensor<14336x4096xf16>
  util.global private @__auto.blk.15.ffn_up.weight = #stream.parameter.named<"model"::"blk.15.ffn_up.weight"> : tensor<14336x4096xf16>
  util.global private @__auto.blk.15.ffn_down.weight = #stream.parameter.named<"model"::"blk.15.ffn_down.weight"> : tensor<4096x14336xf16>
  util.global private @__auto.blk.16.attn_norm.weight = #stream.parameter.named<"model"::"blk.16.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.16.attn_q.weight = #stream.parameter.named<"model"::"blk.16.attn_q.weight"> : tensor<4096x4096xf16>
  util.global private @__auto.blk.16.attn_k.weight = #stream.parameter.named<"model"::"blk.16.attn_k.weight"> : tensor<1024x4096xf16>
  util.global private @__auto.blk.16.attn_v.weight = #stream.parameter.named<"model"::"blk.16.attn_v.weight"> : tensor<1024x4096xf16>
  util.global private @__auto.blk.16.attn_output.weight = #stream.parameter.named<"model"::"blk.16.attn_output.weight"> : tensor<4096x4096xf16>
  util.global private @__auto.blk.16.ffn_norm.weight = #stream.parameter.named<"model"::"blk.16.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.16.ffn_gate.weight = #stream.parameter.named<"model"::"blk.16.ffn_gate.weight"> : tensor<14336x4096xf16>
  util.global private @__auto.blk.16.ffn_up.weight = #stream.parameter.named<"model"::"blk.16.ffn_up.weight"> : tensor<14336x4096xf16>
  util.global private @__auto.blk.16.ffn_down.weight = #stream.parameter.named<"model"::"blk.16.ffn_down.weight"> : tensor<4096x14336xf16>
  util.global private @__auto.blk.17.attn_norm.weight = #stream.parameter.named<"model"::"blk.17.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.17.attn_q.weight = #stream.parameter.named<"model"::"blk.17.attn_q.weight"> : tensor<4096x4096xf16>
  util.global private @__auto.blk.17.attn_k.weight = #stream.parameter.named<"model"::"blk.17.attn_k.weight"> : tensor<1024x4096xf16>
  util.global private @__auto.blk.17.attn_v.weight = #stream.parameter.named<"model"::"blk.17.attn_v.weight"> : tensor<1024x4096xf16>
  util.global private @__auto.blk.17.attn_output.weight = #stream.parameter.named<"model"::"blk.17.attn_output.weight"> : tensor<4096x4096xf16>
  util.global private @__auto.blk.17.ffn_norm.weight = #stream.parameter.named<"model"::"blk.17.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.17.ffn_gate.weight = #stream.parameter.named<"model"::"blk.17.ffn_gate.weight"> : tensor<14336x4096xf16>
  util.global private @__auto.blk.17.ffn_up.weight = #stream.parameter.named<"model"::"blk.17.ffn_up.weight"> : tensor<14336x4096xf16>
  util.global private @__auto.blk.17.ffn_down.weight = #stream.parameter.named<"model"::"blk.17.ffn_down.weight"> : tensor<4096x14336xf16>
  util.global private @__auto.blk.18.attn_norm.weight = #stream.parameter.named<"model"::"blk.18.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.18.attn_q.weight = #stream.parameter.named<"model"::"blk.18.attn_q.weight"> : tensor<4096x4096xf16>
  util.global private @__auto.blk.18.attn_k.weight = #stream.parameter.named<"model"::"blk.18.attn_k.weight"> : tensor<1024x4096xf16>
  util.global private @__auto.blk.18.attn_v.weight = #stream.parameter.named<"model"::"blk.18.attn_v.weight"> : tensor<1024x4096xf16>
  util.global private @__auto.blk.18.attn_output.weight = #stream.parameter.named<"model"::"blk.18.attn_output.weight"> : tensor<4096x4096xf16>
  util.global private @__auto.blk.18.ffn_norm.weight = #stream.parameter.named<"model"::"blk.18.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.18.ffn_gate.weight = #stream.parameter.named<"model"::"blk.18.ffn_gate.weight"> : tensor<14336x4096xf16>
  util.global private @__auto.blk.18.ffn_up.weight = #stream.parameter.named<"model"::"blk.18.ffn_up.weight"> : tensor<14336x4096xf16>
  util.global private @__auto.blk.18.ffn_down.weight = #stream.parameter.named<"model"::"blk.18.ffn_down.weight"> : tensor<4096x14336xf16>
  util.global private @__auto.blk.19.attn_norm.weight = #stream.parameter.named<"model"::"blk.19.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.19.attn_q.weight = #stream.parameter.named<"model"::"blk.19.attn_q.weight"> : tensor<4096x4096xf16>
  util.global private @__auto.blk.19.attn_k.weight = #stream.parameter.named<"model"::"blk.19.attn_k.weight"> : tensor<1024x4096xf16>
  util.global private @__auto.blk.19.attn_v.weight = #stream.parameter.named<"model"::"blk.19.attn_v.weight"> : tensor<1024x4096xf16>
  util.global private @__auto.blk.19.attn_output.weight = #stream.parameter.named<"model"::"blk.19.attn_output.weight"> : tensor<4096x4096xf16>
  util.global private @__auto.blk.19.ffn_norm.weight = #stream.parameter.named<"model"::"blk.19.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.19.ffn_gate.weight = #stream.parameter.named<"model"::"blk.19.ffn_gate.weight"> : tensor<14336x4096xf16>
  util.global private @__auto.blk.19.ffn_up.weight = #stream.parameter.named<"model"::"blk.19.ffn_up.weight"> : tensor<14336x4096xf16>
  util.global private @__auto.blk.19.ffn_down.weight = #stream.parameter.named<"model"::"blk.19.ffn_down.weight"> : tensor<4096x14336xf16>
  util.global private @__auto.blk.20.attn_norm.weight = #stream.parameter.named<"model"::"blk.20.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.20.attn_q.weight = #stream.parameter.named<"model"::"blk.20.attn_q.weight"> : tensor<4096x4096xf16>
  util.global private @__auto.blk.20.attn_k.weight = #stream.parameter.named<"model"::"blk.20.attn_k.weight"> : tensor<1024x4096xf16>
  util.global private @__auto.blk.20.attn_v.weight = #stream.parameter.named<"model"::"blk.20.attn_v.weight"> : tensor<1024x4096xf16>
  util.global private @__auto.blk.20.attn_output.weight = #stream.parameter.named<"model"::"blk.20.attn_output.weight"> : tensor<4096x4096xf16>
  util.global private @__auto.blk.20.ffn_norm.weight = #stream.parameter.named<"model"::"blk.20.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.20.ffn_gate.weight = #stream.parameter.named<"model"::"blk.20.ffn_gate.weight"> : tensor<14336x4096xf16>
  util.global private @__auto.blk.20.ffn_up.weight = #stream.parameter.named<"model"::"blk.20.ffn_up.weight"> : tensor<14336x4096xf16>
  util.global private @__auto.blk.20.ffn_down.weight = #stream.parameter.named<"model"::"blk.20.ffn_down.weight"> : tensor<4096x14336xf16>
  util.global private @__auto.blk.21.attn_norm.weight = #stream.parameter.named<"model"::"blk.21.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.21.attn_q.weight = #stream.parameter.named<"model"::"blk.21.attn_q.weight"> : tensor<4096x4096xf16>
  util.global private @__auto.blk.21.attn_k.weight = #stream.parameter.named<"model"::"blk.21.attn_k.weight"> : tensor<1024x4096xf16>
  util.global private @__auto.blk.21.attn_v.weight = #stream.parameter.named<"model"::"blk.21.attn_v.weight"> : tensor<1024x4096xf16>
  util.global private @__auto.blk.21.attn_output.weight = #stream.parameter.named<"model"::"blk.21.attn_output.weight"> : tensor<4096x4096xf16>
  util.global private @__auto.blk.21.ffn_norm.weight = #stream.parameter.named<"model"::"blk.21.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.21.ffn_gate.weight = #stream.parameter.named<"model"::"blk.21.ffn_gate.weight"> : tensor<14336x4096xf16>
  util.global private @__auto.blk.21.ffn_up.weight = #stream.parameter.named<"model"::"blk.21.ffn_up.weight"> : tensor<14336x4096xf16>
  util.global private @__auto.blk.21.ffn_down.weight = #stream.parameter.named<"model"::"blk.21.ffn_down.weight"> : tensor<4096x14336xf16>
  util.global private @__auto.blk.22.attn_norm.weight = #stream.parameter.named<"model"::"blk.22.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.22.attn_q.weight = #stream.parameter.named<"model"::"blk.22.attn_q.weight"> : tensor<4096x4096xf16>
  util.global private @__auto.blk.22.attn_k.weight = #stream.parameter.named<"model"::"blk.22.attn_k.weight"> : tensor<1024x4096xf16>
  util.global private @__auto.blk.22.attn_v.weight = #stream.parameter.named<"model"::"blk.22.attn_v.weight"> : tensor<1024x4096xf16>
  util.global private @__auto.blk.22.attn_output.weight = #stream.parameter.named<"model"::"blk.22.attn_output.weight"> : tensor<4096x4096xf16>
  util.global private @__auto.blk.22.ffn_norm.weight = #stream.parameter.named<"model"::"blk.22.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.22.ffn_gate.weight = #stream.parameter.named<"model"::"blk.22.ffn_gate.weight"> : tensor<14336x4096xf16>
  util.global private @__auto.blk.22.ffn_up.weight = #stream.parameter.named<"model"::"blk.22.ffn_up.weight"> : tensor<14336x4096xf16>
  util.global private @__auto.blk.22.ffn_down.weight = #stream.parameter.named<"model"::"blk.22.ffn_down.weight"> : tensor<4096x14336xf16>
  util.global private @__auto.blk.23.attn_norm.weight = #stream.parameter.named<"model"::"blk.23.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.23.attn_q.weight = #stream.parameter.named<"model"::"blk.23.attn_q.weight"> : tensor<4096x4096xf16>
  util.global private @__auto.blk.23.attn_k.weight = #stream.parameter.named<"model"::"blk.23.attn_k.weight"> : tensor<1024x4096xf16>
  util.global private @__auto.blk.23.attn_v.weight = #stream.parameter.named<"model"::"blk.23.attn_v.weight"> : tensor<1024x4096xf16>
  util.global private @__auto.blk.23.attn_output.weight = #stream.parameter.named<"model"::"blk.23.attn_output.weight"> : tensor<4096x4096xf16>
  util.global private @__auto.blk.23.ffn_norm.weight = #stream.parameter.named<"model"::"blk.23.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.23.ffn_gate.weight = #stream.parameter.named<"model"::"blk.23.ffn_gate.weight"> : tensor<14336x4096xf16>
  util.global private @__auto.blk.23.ffn_up.weight = #stream.parameter.named<"model"::"blk.23.ffn_up.weight"> : tensor<14336x4096xf16>
  util.global private @__auto.blk.23.ffn_down.weight = #stream.parameter.named<"model"::"blk.23.ffn_down.weight"> : tensor<4096x14336xf16>
  util.global private @__auto.blk.24.attn_norm.weight = #stream.parameter.named<"model"::"blk.24.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.24.attn_q.weight = #stream.parameter.named<"model"::"blk.24.attn_q.weight"> : tensor<4096x4096xf16>
  util.global private @__auto.blk.24.attn_k.weight = #stream.parameter.named<"model"::"blk.24.attn_k.weight"> : tensor<1024x4096xf16>
  util.global private @__auto.blk.24.attn_v.weight = #stream.parameter.named<"model"::"blk.24.attn_v.weight"> : tensor<1024x4096xf16>
  util.global private @__auto.blk.24.attn_output.weight = #stream.parameter.named<"model"::"blk.24.attn_output.weight"> : tensor<4096x4096xf16>
  util.global private @__auto.blk.24.ffn_norm.weight = #stream.parameter.named<"model"::"blk.24.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.24.ffn_gate.weight = #stream.parameter.named<"model"::"blk.24.ffn_gate.weight"> : tensor<14336x4096xf16>
  util.global private @__auto.blk.24.ffn_up.weight = #stream.parameter.named<"model"::"blk.24.ffn_up.weight"> : tensor<14336x4096xf16>
  util.global private @__auto.blk.24.ffn_down.weight = #stream.parameter.named<"model"::"blk.24.ffn_down.weight"> : tensor<4096x14336xf16>
  util.global private @__auto.blk.25.attn_norm.weight = #stream.parameter.named<"model"::"blk.25.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.25.attn_q.weight = #stream.parameter.named<"model"::"blk.25.attn_q.weight"> : tensor<4096x4096xf16>
  util.global private @__auto.blk.25.attn_k.weight = #stream.parameter.named<"model"::"blk.25.attn_k.weight"> : tensor<1024x4096xf16>
  util.global private @__auto.blk.25.attn_v.weight = #stream.parameter.named<"model"::"blk.25.attn_v.weight"> : tensor<1024x4096xf16>
  util.global private @__auto.blk.25.attn_output.weight = #stream.parameter.named<"model"::"blk.25.attn_output.weight"> : tensor<4096x4096xf16>
  util.global private @__auto.blk.25.ffn_norm.weight = #stream.parameter.named<"model"::"blk.25.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.25.ffn_gate.weight = #stream.parameter.named<"model"::"blk.25.ffn_gate.weight"> : tensor<14336x4096xf16>
  util.global private @__auto.blk.25.ffn_up.weight = #stream.parameter.named<"model"::"blk.25.ffn_up.weight"> : tensor<14336x4096xf16>
  util.global private @__auto.blk.25.ffn_down.weight = #stream.parameter.named<"model"::"blk.25.ffn_down.weight"> : tensor<4096x14336xf16>
  util.global private @__auto.blk.26.attn_norm.weight = #stream.parameter.named<"model"::"blk.26.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.26.attn_q.weight = #stream.parameter.named<"model"::"blk.26.attn_q.weight"> : tensor<4096x4096xf16>
  util.global private @__auto.blk.26.attn_k.weight = #stream.parameter.named<"model"::"blk.26.attn_k.weight"> : tensor<1024x4096xf16>
  util.global private @__auto.blk.26.attn_v.weight = #stream.parameter.named<"model"::"blk.26.attn_v.weight"> : tensor<1024x4096xf16>
  util.global private @__auto.blk.26.attn_output.weight = #stream.parameter.named<"model"::"blk.26.attn_output.weight"> : tensor<4096x4096xf16>
  util.global private @__auto.blk.26.ffn_norm.weight = #stream.parameter.named<"model"::"blk.26.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.26.ffn_gate.weight = #stream.parameter.named<"model"::"blk.26.ffn_gate.weight"> : tensor<14336x4096xf16>
  util.global private @__auto.blk.26.ffn_up.weight = #stream.parameter.named<"model"::"blk.26.ffn_up.weight"> : tensor<14336x4096xf16>
  util.global private @__auto.blk.26.ffn_down.weight = #stream.parameter.named<"model"::"blk.26.ffn_down.weight"> : tensor<4096x14336xf16>
  util.global private @__auto.blk.27.attn_norm.weight = #stream.parameter.named<"model"::"blk.27.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.27.attn_q.weight = #stream.parameter.named<"model"::"blk.27.attn_q.weight"> : tensor<4096x4096xf16>
  util.global private @__auto.blk.27.attn_k.weight = #stream.parameter.named<"model"::"blk.27.attn_k.weight"> : tensor<1024x4096xf16>
  util.global private @__auto.blk.27.attn_v.weight = #stream.parameter.named<"model"::"blk.27.attn_v.weight"> : tensor<1024x4096xf16>
  util.global private @__auto.blk.27.attn_output.weight = #stream.parameter.named<"model"::"blk.27.attn_output.weight"> : tensor<4096x4096xf16>
  util.global private @__auto.blk.27.ffn_norm.weight = #stream.parameter.named<"model"::"blk.27.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.27.ffn_gate.weight = #stream.parameter.named<"model"::"blk.27.ffn_gate.weight"> : tensor<14336x4096xf16>
  util.global private @__auto.blk.27.ffn_up.weight = #stream.parameter.named<"model"::"blk.27.ffn_up.weight"> : tensor<14336x4096xf16>
  util.global private @__auto.blk.27.ffn_down.weight = #stream.parameter.named<"model"::"blk.27.ffn_down.weight"> : tensor<4096x14336xf16>
  util.global private @__auto.blk.28.attn_norm.weight = #stream.parameter.named<"model"::"blk.28.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.28.attn_q.weight = #stream.parameter.named<"model"::"blk.28.attn_q.weight"> : tensor<4096x4096xf16>
  util.global private @__auto.blk.28.attn_k.weight = #stream.parameter.named<"model"::"blk.28.attn_k.weight"> : tensor<1024x4096xf16>
  util.global private @__auto.blk.28.attn_v.weight = #stream.parameter.named<"model"::"blk.28.attn_v.weight"> : tensor<1024x4096xf16>
  util.global private @__auto.blk.28.attn_output.weight = #stream.parameter.named<"model"::"blk.28.attn_output.weight"> : tensor<4096x4096xf16>
  util.global private @__auto.blk.28.ffn_norm.weight = #stream.parameter.named<"model"::"blk.28.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.28.ffn_gate.weight = #stream.parameter.named<"model"::"blk.28.ffn_gate.weight"> : tensor<14336x4096xf16>
  util.global private @__auto.blk.28.ffn_up.weight = #stream.parameter.named<"model"::"blk.28.ffn_up.weight"> : tensor<14336x4096xf16>
  util.global private @__auto.blk.28.ffn_down.weight = #stream.parameter.named<"model"::"blk.28.ffn_down.weight"> : tensor<4096x14336xf16>
  util.global private @__auto.blk.29.attn_norm.weight = #stream.parameter.named<"model"::"blk.29.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.29.attn_q.weight = #stream.parameter.named<"model"::"blk.29.attn_q.weight"> : tensor<4096x4096xf16>
  util.global private @__auto.blk.29.attn_k.weight = #stream.parameter.named<"model"::"blk.29.attn_k.weight"> : tensor<1024x4096xf16>
  util.global private @__auto.blk.29.attn_v.weight = #stream.parameter.named<"model"::"blk.29.attn_v.weight"> : tensor<1024x4096xf16>
  util.global private @__auto.blk.29.attn_output.weight = #stream.parameter.named<"model"::"blk.29.attn_output.weight"> : tensor<4096x4096xf16>
  util.global private @__auto.blk.29.ffn_norm.weight = #stream.parameter.named<"model"::"blk.29.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.29.ffn_gate.weight = #stream.parameter.named<"model"::"blk.29.ffn_gate.weight"> : tensor<14336x4096xf16>
  util.global private @__auto.blk.29.ffn_up.weight = #stream.parameter.named<"model"::"blk.29.ffn_up.weight"> : tensor<14336x4096xf16>
  util.global private @__auto.blk.29.ffn_down.weight = #stream.parameter.named<"model"::"blk.29.ffn_down.weight"> : tensor<4096x14336xf16>
  util.global private @__auto.blk.30.attn_norm.weight = #stream.parameter.named<"model"::"blk.30.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.30.attn_q.weight = #stream.parameter.named<"model"::"blk.30.attn_q.weight"> : tensor<4096x4096xf16>
  util.global private @__auto.blk.30.attn_k.weight = #stream.parameter.named<"model"::"blk.30.attn_k.weight"> : tensor<1024x4096xf16>
  util.global private @__auto.blk.30.attn_v.weight = #stream.parameter.named<"model"::"blk.30.attn_v.weight"> : tensor<1024x4096xf16>
  util.global private @__auto.blk.30.attn_output.weight = #stream.parameter.named<"model"::"blk.30.attn_output.weight"> : tensor<4096x4096xf16>
  util.global private @__auto.blk.30.ffn_norm.weight = #stream.parameter.named<"model"::"blk.30.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.30.ffn_gate.weight = #stream.parameter.named<"model"::"blk.30.ffn_gate.weight"> : tensor<14336x4096xf16>
  util.global private @__auto.blk.30.ffn_up.weight = #stream.parameter.named<"model"::"blk.30.ffn_up.weight"> : tensor<14336x4096xf16>
  util.global private @__auto.blk.30.ffn_down.weight = #stream.parameter.named<"model"::"blk.30.ffn_down.weight"> : tensor<4096x14336xf16>
  util.global private @__auto.blk.31.attn_norm.weight = #stream.parameter.named<"model"::"blk.31.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.31.attn_q.weight = #stream.parameter.named<"model"::"blk.31.attn_q.weight"> : tensor<4096x4096xf16>
  util.global private @__auto.blk.31.attn_k.weight = #stream.parameter.named<"model"::"blk.31.attn_k.weight"> : tensor<1024x4096xf16>
  util.global private @__auto.blk.31.attn_v.weight = #stream.parameter.named<"model"::"blk.31.attn_v.weight"> : tensor<1024x4096xf16>
  util.global private @__auto.blk.31.attn_output.weight = #stream.parameter.named<"model"::"blk.31.attn_output.weight"> : tensor<4096x4096xf16>
  util.global private @__auto.blk.31.ffn_norm.weight = #stream.parameter.named<"model"::"blk.31.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.31.ffn_gate.weight = #stream.parameter.named<"model"::"blk.31.ffn_gate.weight"> : tensor<14336x4096xf16>
  util.global private @__auto.blk.31.ffn_up.weight = #stream.parameter.named<"model"::"blk.31.ffn_up.weight"> : tensor<14336x4096xf16>
  util.global private @__auto.blk.31.ffn_down.weight = #stream.parameter.named<"model"::"blk.31.ffn_down.weight"> : tensor<4096x14336xf16>
  util.global private @__auto.output_norm.weight = #stream.parameter.named<"model"::"output_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.output.weight = #stream.parameter.named<"model"::"output.weight"> : tensor<128256x4096xf16>
  func.func @prefill_bs4(%arg0: !torch.vtensor<[4,?],si64>, %arg1: !torch.vtensor<[4],si64>, %arg2: !torch.vtensor<[4,?],si64>, %arg3: !torch.tensor<[?,2097152],f16>) -> !torch.vtensor<[4,?,128256],f16> attributes {torch.assume_strict_symbolic_shapes} {
    %__auto.token_embd.weight = util.global.load @__auto.token_embd.weight : tensor<128256x4096xf16>
    %0 = torch_c.from_builtin_tensor %__auto.token_embd.weight : tensor<128256x4096xf16> -> !torch.vtensor<[128256,4096],f16>
    %__auto.blk.0.attn_norm.weight = util.global.load @__auto.blk.0.attn_norm.weight : tensor<4096xf32>
    %1 = torch_c.from_builtin_tensor %__auto.blk.0.attn_norm.weight : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.0.attn_q.weight = util.global.load @__auto.blk.0.attn_q.weight : tensor<4096x4096xf16>
    %2 = torch_c.from_builtin_tensor %__auto.blk.0.attn_q.weight : tensor<4096x4096xf16> -> !torch.vtensor<[4096,4096],f16>
    %__auto.blk.0.attn_k.weight = util.global.load @__auto.blk.0.attn_k.weight : tensor<1024x4096xf16>
    %3 = torch_c.from_builtin_tensor %__auto.blk.0.attn_k.weight : tensor<1024x4096xf16> -> !torch.vtensor<[1024,4096],f16>
    %__auto.blk.0.attn_v.weight = util.global.load @__auto.blk.0.attn_v.weight : tensor<1024x4096xf16>
    %4 = torch_c.from_builtin_tensor %__auto.blk.0.attn_v.weight : tensor<1024x4096xf16> -> !torch.vtensor<[1024,4096],f16>
    %__auto.blk.0.attn_output.weight = util.global.load @__auto.blk.0.attn_output.weight : tensor<4096x4096xf16>
    %5 = torch_c.from_builtin_tensor %__auto.blk.0.attn_output.weight : tensor<4096x4096xf16> -> !torch.vtensor<[4096,4096],f16>
    %__auto.blk.0.ffn_norm.weight = util.global.load @__auto.blk.0.ffn_norm.weight : tensor<4096xf32>
    %6 = torch_c.from_builtin_tensor %__auto.blk.0.ffn_norm.weight : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.0.ffn_gate.weight = util.global.load @__auto.blk.0.ffn_gate.weight : tensor<14336x4096xf16>
    %7 = torch_c.from_builtin_tensor %__auto.blk.0.ffn_gate.weight : tensor<14336x4096xf16> -> !torch.vtensor<[14336,4096],f16>
    %__auto.blk.0.ffn_up.weight = util.global.load @__auto.blk.0.ffn_up.weight : tensor<14336x4096xf16>
    %8 = torch_c.from_builtin_tensor %__auto.blk.0.ffn_up.weight : tensor<14336x4096xf16> -> !torch.vtensor<[14336,4096],f16>
    %__auto.blk.0.ffn_down.weight = util.global.load @__auto.blk.0.ffn_down.weight : tensor<4096x14336xf16>
    %9 = torch_c.from_builtin_tensor %__auto.blk.0.ffn_down.weight : tensor<4096x14336xf16> -> !torch.vtensor<[4096,14336],f16>
    %__auto.blk.1.attn_norm.weight = util.global.load @__auto.blk.1.attn_norm.weight : tensor<4096xf32>
    %10 = torch_c.from_builtin_tensor %__auto.blk.1.attn_norm.weight : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.1.attn_q.weight = util.global.load @__auto.blk.1.attn_q.weight : tensor<4096x4096xf16>
    %11 = torch_c.from_builtin_tensor %__auto.blk.1.attn_q.weight : tensor<4096x4096xf16> -> !torch.vtensor<[4096,4096],f16>
    %__auto.blk.1.attn_k.weight = util.global.load @__auto.blk.1.attn_k.weight : tensor<1024x4096xf16>
    %12 = torch_c.from_builtin_tensor %__auto.blk.1.attn_k.weight : tensor<1024x4096xf16> -> !torch.vtensor<[1024,4096],f16>
    %__auto.blk.1.attn_v.weight = util.global.load @__auto.blk.1.attn_v.weight : tensor<1024x4096xf16>
    %13 = torch_c.from_builtin_tensor %__auto.blk.1.attn_v.weight : tensor<1024x4096xf16> -> !torch.vtensor<[1024,4096],f16>
    %__auto.blk.1.attn_output.weight = util.global.load @__auto.blk.1.attn_output.weight : tensor<4096x4096xf16>
    %14 = torch_c.from_builtin_tensor %__auto.blk.1.attn_output.weight : tensor<4096x4096xf16> -> !torch.vtensor<[4096,4096],f16>
    %__auto.blk.1.ffn_norm.weight = util.global.load @__auto.blk.1.ffn_norm.weight : tensor<4096xf32>
    %15 = torch_c.from_builtin_tensor %__auto.blk.1.ffn_norm.weight : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.1.ffn_gate.weight = util.global.load @__auto.blk.1.ffn_gate.weight : tensor<14336x4096xf16>
    %16 = torch_c.from_builtin_tensor %__auto.blk.1.ffn_gate.weight : tensor<14336x4096xf16> -> !torch.vtensor<[14336,4096],f16>
    %__auto.blk.1.ffn_up.weight = util.global.load @__auto.blk.1.ffn_up.weight : tensor<14336x4096xf16>
    %17 = torch_c.from_builtin_tensor %__auto.blk.1.ffn_up.weight : tensor<14336x4096xf16> -> !torch.vtensor<[14336,4096],f16>
    %__auto.blk.1.ffn_down.weight = util.global.load @__auto.blk.1.ffn_down.weight : tensor<4096x14336xf16>
    %18 = torch_c.from_builtin_tensor %__auto.blk.1.ffn_down.weight : tensor<4096x14336xf16> -> !torch.vtensor<[4096,14336],f16>
    %__auto.blk.2.attn_norm.weight = util.global.load @__auto.blk.2.attn_norm.weight : tensor<4096xf32>
    %19 = torch_c.from_builtin_tensor %__auto.blk.2.attn_norm.weight : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.2.attn_q.weight = util.global.load @__auto.blk.2.attn_q.weight : tensor<4096x4096xf16>
    %20 = torch_c.from_builtin_tensor %__auto.blk.2.attn_q.weight : tensor<4096x4096xf16> -> !torch.vtensor<[4096,4096],f16>
    %__auto.blk.2.attn_k.weight = util.global.load @__auto.blk.2.attn_k.weight : tensor<1024x4096xf16>
    %21 = torch_c.from_builtin_tensor %__auto.blk.2.attn_k.weight : tensor<1024x4096xf16> -> !torch.vtensor<[1024,4096],f16>
    %__auto.blk.2.attn_v.weight = util.global.load @__auto.blk.2.attn_v.weight : tensor<1024x4096xf16>
    %22 = torch_c.from_builtin_tensor %__auto.blk.2.attn_v.weight : tensor<1024x4096xf16> -> !torch.vtensor<[1024,4096],f16>
    %__auto.blk.2.attn_output.weight = util.global.load @__auto.blk.2.attn_output.weight : tensor<4096x4096xf16>
    %23 = torch_c.from_builtin_tensor %__auto.blk.2.attn_output.weight : tensor<4096x4096xf16> -> !torch.vtensor<[4096,4096],f16>
    %__auto.blk.2.ffn_norm.weight = util.global.load @__auto.blk.2.ffn_norm.weight : tensor<4096xf32>
    %24 = torch_c.from_builtin_tensor %__auto.blk.2.ffn_norm.weight : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.2.ffn_gate.weight = util.global.load @__auto.blk.2.ffn_gate.weight : tensor<14336x4096xf16>
    %25 = torch_c.from_builtin_tensor %__auto.blk.2.ffn_gate.weight : tensor<14336x4096xf16> -> !torch.vtensor<[14336,4096],f16>
    %__auto.blk.2.ffn_up.weight = util.global.load @__auto.blk.2.ffn_up.weight : tensor<14336x4096xf16>
    %26 = torch_c.from_builtin_tensor %__auto.blk.2.ffn_up.weight : tensor<14336x4096xf16> -> !torch.vtensor<[14336,4096],f16>
    %__auto.blk.2.ffn_down.weight = util.global.load @__auto.blk.2.ffn_down.weight : tensor<4096x14336xf16>
    %27 = torch_c.from_builtin_tensor %__auto.blk.2.ffn_down.weight : tensor<4096x14336xf16> -> !torch.vtensor<[4096,14336],f16>
    %__auto.blk.3.attn_norm.weight = util.global.load @__auto.blk.3.attn_norm.weight : tensor<4096xf32>
    %28 = torch_c.from_builtin_tensor %__auto.blk.3.attn_norm.weight : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.3.attn_q.weight = util.global.load @__auto.blk.3.attn_q.weight : tensor<4096x4096xf16>
    %29 = torch_c.from_builtin_tensor %__auto.blk.3.attn_q.weight : tensor<4096x4096xf16> -> !torch.vtensor<[4096,4096],f16>
    %__auto.blk.3.attn_k.weight = util.global.load @__auto.blk.3.attn_k.weight : tensor<1024x4096xf16>
    %30 = torch_c.from_builtin_tensor %__auto.blk.3.attn_k.weight : tensor<1024x4096xf16> -> !torch.vtensor<[1024,4096],f16>
    %__auto.blk.3.attn_v.weight = util.global.load @__auto.blk.3.attn_v.weight : tensor<1024x4096xf16>
    %31 = torch_c.from_builtin_tensor %__auto.blk.3.attn_v.weight : tensor<1024x4096xf16> -> !torch.vtensor<[1024,4096],f16>
    %__auto.blk.3.attn_output.weight = util.global.load @__auto.blk.3.attn_output.weight : tensor<4096x4096xf16>
    %32 = torch_c.from_builtin_tensor %__auto.blk.3.attn_output.weight : tensor<4096x4096xf16> -> !torch.vtensor<[4096,4096],f16>
    %__auto.blk.3.ffn_norm.weight = util.global.load @__auto.blk.3.ffn_norm.weight : tensor<4096xf32>
    %33 = torch_c.from_builtin_tensor %__auto.blk.3.ffn_norm.weight : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.3.ffn_gate.weight = util.global.load @__auto.blk.3.ffn_gate.weight : tensor<14336x4096xf16>
    %34 = torch_c.from_builtin_tensor %__auto.blk.3.ffn_gate.weight : tensor<14336x4096xf16> -> !torch.vtensor<[14336,4096],f16>
    %__auto.blk.3.ffn_up.weight = util.global.load @__auto.blk.3.ffn_up.weight : tensor<14336x4096xf16>
    %35 = torch_c.from_builtin_tensor %__auto.blk.3.ffn_up.weight : tensor<14336x4096xf16> -> !torch.vtensor<[14336,4096],f16>
    %__auto.blk.3.ffn_down.weight = util.global.load @__auto.blk.3.ffn_down.weight : tensor<4096x14336xf16>
    %36 = torch_c.from_builtin_tensor %__auto.blk.3.ffn_down.weight : tensor<4096x14336xf16> -> !torch.vtensor<[4096,14336],f16>
    %__auto.blk.4.attn_norm.weight = util.global.load @__auto.blk.4.attn_norm.weight : tensor<4096xf32>
    %37 = torch_c.from_builtin_tensor %__auto.blk.4.attn_norm.weight : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.4.attn_q.weight = util.global.load @__auto.blk.4.attn_q.weight : tensor<4096x4096xf16>
    %38 = torch_c.from_builtin_tensor %__auto.blk.4.attn_q.weight : tensor<4096x4096xf16> -> !torch.vtensor<[4096,4096],f16>
    %__auto.blk.4.attn_k.weight = util.global.load @__auto.blk.4.attn_k.weight : tensor<1024x4096xf16>
    %39 = torch_c.from_builtin_tensor %__auto.blk.4.attn_k.weight : tensor<1024x4096xf16> -> !torch.vtensor<[1024,4096],f16>
    %__auto.blk.4.attn_v.weight = util.global.load @__auto.blk.4.attn_v.weight : tensor<1024x4096xf16>
    %40 = torch_c.from_builtin_tensor %__auto.blk.4.attn_v.weight : tensor<1024x4096xf16> -> !torch.vtensor<[1024,4096],f16>
    %__auto.blk.4.attn_output.weight = util.global.load @__auto.blk.4.attn_output.weight : tensor<4096x4096xf16>
    %41 = torch_c.from_builtin_tensor %__auto.blk.4.attn_output.weight : tensor<4096x4096xf16> -> !torch.vtensor<[4096,4096],f16>
    %__auto.blk.4.ffn_norm.weight = util.global.load @__auto.blk.4.ffn_norm.weight : tensor<4096xf32>
    %42 = torch_c.from_builtin_tensor %__auto.blk.4.ffn_norm.weight : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.4.ffn_gate.weight = util.global.load @__auto.blk.4.ffn_gate.weight : tensor<14336x4096xf16>
    %43 = torch_c.from_builtin_tensor %__auto.blk.4.ffn_gate.weight : tensor<14336x4096xf16> -> !torch.vtensor<[14336,4096],f16>
    %__auto.blk.4.ffn_up.weight = util.global.load @__auto.blk.4.ffn_up.weight : tensor<14336x4096xf16>
    %44 = torch_c.from_builtin_tensor %__auto.blk.4.ffn_up.weight : tensor<14336x4096xf16> -> !torch.vtensor<[14336,4096],f16>
    %__auto.blk.4.ffn_down.weight = util.global.load @__auto.blk.4.ffn_down.weight : tensor<4096x14336xf16>
    %45 = torch_c.from_builtin_tensor %__auto.blk.4.ffn_down.weight : tensor<4096x14336xf16> -> !torch.vtensor<[4096,14336],f16>
    %__auto.blk.5.attn_norm.weight = util.global.load @__auto.blk.5.attn_norm.weight : tensor<4096xf32>
    %46 = torch_c.from_builtin_tensor %__auto.blk.5.attn_norm.weight : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.5.attn_q.weight = util.global.load @__auto.blk.5.attn_q.weight : tensor<4096x4096xf16>
    %47 = torch_c.from_builtin_tensor %__auto.blk.5.attn_q.weight : tensor<4096x4096xf16> -> !torch.vtensor<[4096,4096],f16>
    %__auto.blk.5.attn_k.weight = util.global.load @__auto.blk.5.attn_k.weight : tensor<1024x4096xf16>
    %48 = torch_c.from_builtin_tensor %__auto.blk.5.attn_k.weight : tensor<1024x4096xf16> -> !torch.vtensor<[1024,4096],f16>
    %__auto.blk.5.attn_v.weight = util.global.load @__auto.blk.5.attn_v.weight : tensor<1024x4096xf16>
    %49 = torch_c.from_builtin_tensor %__auto.blk.5.attn_v.weight : tensor<1024x4096xf16> -> !torch.vtensor<[1024,4096],f16>
    %__auto.blk.5.attn_output.weight = util.global.load @__auto.blk.5.attn_output.weight : tensor<4096x4096xf16>
    %50 = torch_c.from_builtin_tensor %__auto.blk.5.attn_output.weight : tensor<4096x4096xf16> -> !torch.vtensor<[4096,4096],f16>
    %__auto.blk.5.ffn_norm.weight = util.global.load @__auto.blk.5.ffn_norm.weight : tensor<4096xf32>
    %51 = torch_c.from_builtin_tensor %__auto.blk.5.ffn_norm.weight : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.5.ffn_gate.weight = util.global.load @__auto.blk.5.ffn_gate.weight : tensor<14336x4096xf16>
    %52 = torch_c.from_builtin_tensor %__auto.blk.5.ffn_gate.weight : tensor<14336x4096xf16> -> !torch.vtensor<[14336,4096],f16>
    %__auto.blk.5.ffn_up.weight = util.global.load @__auto.blk.5.ffn_up.weight : tensor<14336x4096xf16>
    %53 = torch_c.from_builtin_tensor %__auto.blk.5.ffn_up.weight : tensor<14336x4096xf16> -> !torch.vtensor<[14336,4096],f16>
    %__auto.blk.5.ffn_down.weight = util.global.load @__auto.blk.5.ffn_down.weight : tensor<4096x14336xf16>
    %54 = torch_c.from_builtin_tensor %__auto.blk.5.ffn_down.weight : tensor<4096x14336xf16> -> !torch.vtensor<[4096,14336],f16>
    %__auto.blk.6.attn_norm.weight = util.global.load @__auto.blk.6.attn_norm.weight : tensor<4096xf32>
    %55 = torch_c.from_builtin_tensor %__auto.blk.6.attn_norm.weight : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.6.attn_q.weight = util.global.load @__auto.blk.6.attn_q.weight : tensor<4096x4096xf16>
    %56 = torch_c.from_builtin_tensor %__auto.blk.6.attn_q.weight : tensor<4096x4096xf16> -> !torch.vtensor<[4096,4096],f16>
    %__auto.blk.6.attn_k.weight = util.global.load @__auto.blk.6.attn_k.weight : tensor<1024x4096xf16>
    %57 = torch_c.from_builtin_tensor %__auto.blk.6.attn_k.weight : tensor<1024x4096xf16> -> !torch.vtensor<[1024,4096],f16>
    %__auto.blk.6.attn_v.weight = util.global.load @__auto.blk.6.attn_v.weight : tensor<1024x4096xf16>
    %58 = torch_c.from_builtin_tensor %__auto.blk.6.attn_v.weight : tensor<1024x4096xf16> -> !torch.vtensor<[1024,4096],f16>
    %__auto.blk.6.attn_output.weight = util.global.load @__auto.blk.6.attn_output.weight : tensor<4096x4096xf16>
    %59 = torch_c.from_builtin_tensor %__auto.blk.6.attn_output.weight : tensor<4096x4096xf16> -> !torch.vtensor<[4096,4096],f16>
    %__auto.blk.6.ffn_norm.weight = util.global.load @__auto.blk.6.ffn_norm.weight : tensor<4096xf32>
    %60 = torch_c.from_builtin_tensor %__auto.blk.6.ffn_norm.weight : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.6.ffn_gate.weight = util.global.load @__auto.blk.6.ffn_gate.weight : tensor<14336x4096xf16>
    %61 = torch_c.from_builtin_tensor %__auto.blk.6.ffn_gate.weight : tensor<14336x4096xf16> -> !torch.vtensor<[14336,4096],f16>
    %__auto.blk.6.ffn_up.weight = util.global.load @__auto.blk.6.ffn_up.weight : tensor<14336x4096xf16>
    %62 = torch_c.from_builtin_tensor %__auto.blk.6.ffn_up.weight : tensor<14336x4096xf16> -> !torch.vtensor<[14336,4096],f16>
    %__auto.blk.6.ffn_down.weight = util.global.load @__auto.blk.6.ffn_down.weight : tensor<4096x14336xf16>
    %63 = torch_c.from_builtin_tensor %__auto.blk.6.ffn_down.weight : tensor<4096x14336xf16> -> !torch.vtensor<[4096,14336],f16>
    %__auto.blk.7.attn_norm.weight = util.global.load @__auto.blk.7.attn_norm.weight : tensor<4096xf32>
    %64 = torch_c.from_builtin_tensor %__auto.blk.7.attn_norm.weight : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.7.attn_q.weight = util.global.load @__auto.blk.7.attn_q.weight : tensor<4096x4096xf16>
    %65 = torch_c.from_builtin_tensor %__auto.blk.7.attn_q.weight : tensor<4096x4096xf16> -> !torch.vtensor<[4096,4096],f16>
    %__auto.blk.7.attn_k.weight = util.global.load @__auto.blk.7.attn_k.weight : tensor<1024x4096xf16>
    %66 = torch_c.from_builtin_tensor %__auto.blk.7.attn_k.weight : tensor<1024x4096xf16> -> !torch.vtensor<[1024,4096],f16>
    %__auto.blk.7.attn_v.weight = util.global.load @__auto.blk.7.attn_v.weight : tensor<1024x4096xf16>
    %67 = torch_c.from_builtin_tensor %__auto.blk.7.attn_v.weight : tensor<1024x4096xf16> -> !torch.vtensor<[1024,4096],f16>
    %__auto.blk.7.attn_output.weight = util.global.load @__auto.blk.7.attn_output.weight : tensor<4096x4096xf16>
    %68 = torch_c.from_builtin_tensor %__auto.blk.7.attn_output.weight : tensor<4096x4096xf16> -> !torch.vtensor<[4096,4096],f16>
    %__auto.blk.7.ffn_norm.weight = util.global.load @__auto.blk.7.ffn_norm.weight : tensor<4096xf32>
    %69 = torch_c.from_builtin_tensor %__auto.blk.7.ffn_norm.weight : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.7.ffn_gate.weight = util.global.load @__auto.blk.7.ffn_gate.weight : tensor<14336x4096xf16>
    %70 = torch_c.from_builtin_tensor %__auto.blk.7.ffn_gate.weight : tensor<14336x4096xf16> -> !torch.vtensor<[14336,4096],f16>
    %__auto.blk.7.ffn_up.weight = util.global.load @__auto.blk.7.ffn_up.weight : tensor<14336x4096xf16>
    %71 = torch_c.from_builtin_tensor %__auto.blk.7.ffn_up.weight : tensor<14336x4096xf16> -> !torch.vtensor<[14336,4096],f16>
    %__auto.blk.7.ffn_down.weight = util.global.load @__auto.blk.7.ffn_down.weight : tensor<4096x14336xf16>
    %72 = torch_c.from_builtin_tensor %__auto.blk.7.ffn_down.weight : tensor<4096x14336xf16> -> !torch.vtensor<[4096,14336],f16>
    %__auto.blk.8.attn_norm.weight = util.global.load @__auto.blk.8.attn_norm.weight : tensor<4096xf32>
    %73 = torch_c.from_builtin_tensor %__auto.blk.8.attn_norm.weight : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.8.attn_q.weight = util.global.load @__auto.blk.8.attn_q.weight : tensor<4096x4096xf16>
    %74 = torch_c.from_builtin_tensor %__auto.blk.8.attn_q.weight : tensor<4096x4096xf16> -> !torch.vtensor<[4096,4096],f16>
    %__auto.blk.8.attn_k.weight = util.global.load @__auto.blk.8.attn_k.weight : tensor<1024x4096xf16>
    %75 = torch_c.from_builtin_tensor %__auto.blk.8.attn_k.weight : tensor<1024x4096xf16> -> !torch.vtensor<[1024,4096],f16>
    %__auto.blk.8.attn_v.weight = util.global.load @__auto.blk.8.attn_v.weight : tensor<1024x4096xf16>
    %76 = torch_c.from_builtin_tensor %__auto.blk.8.attn_v.weight : tensor<1024x4096xf16> -> !torch.vtensor<[1024,4096],f16>
    %__auto.blk.8.attn_output.weight = util.global.load @__auto.blk.8.attn_output.weight : tensor<4096x4096xf16>
    %77 = torch_c.from_builtin_tensor %__auto.blk.8.attn_output.weight : tensor<4096x4096xf16> -> !torch.vtensor<[4096,4096],f16>
    %__auto.blk.8.ffn_norm.weight = util.global.load @__auto.blk.8.ffn_norm.weight : tensor<4096xf32>
    %78 = torch_c.from_builtin_tensor %__auto.blk.8.ffn_norm.weight : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.8.ffn_gate.weight = util.global.load @__auto.blk.8.ffn_gate.weight : tensor<14336x4096xf16>
    %79 = torch_c.from_builtin_tensor %__auto.blk.8.ffn_gate.weight : tensor<14336x4096xf16> -> !torch.vtensor<[14336,4096],f16>
    %__auto.blk.8.ffn_up.weight = util.global.load @__auto.blk.8.ffn_up.weight : tensor<14336x4096xf16>
    %80 = torch_c.from_builtin_tensor %__auto.blk.8.ffn_up.weight : tensor<14336x4096xf16> -> !torch.vtensor<[14336,4096],f16>
    %__auto.blk.8.ffn_down.weight = util.global.load @__auto.blk.8.ffn_down.weight : tensor<4096x14336xf16>
    %81 = torch_c.from_builtin_tensor %__auto.blk.8.ffn_down.weight : tensor<4096x14336xf16> -> !torch.vtensor<[4096,14336],f16>
    %__auto.blk.9.attn_norm.weight = util.global.load @__auto.blk.9.attn_norm.weight : tensor<4096xf32>
    %82 = torch_c.from_builtin_tensor %__auto.blk.9.attn_norm.weight : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.9.attn_q.weight = util.global.load @__auto.blk.9.attn_q.weight : tensor<4096x4096xf16>
    %83 = torch_c.from_builtin_tensor %__auto.blk.9.attn_q.weight : tensor<4096x4096xf16> -> !torch.vtensor<[4096,4096],f16>
    %__auto.blk.9.attn_k.weight = util.global.load @__auto.blk.9.attn_k.weight : tensor<1024x4096xf16>
    %84 = torch_c.from_builtin_tensor %__auto.blk.9.attn_k.weight : tensor<1024x4096xf16> -> !torch.vtensor<[1024,4096],f16>
    %__auto.blk.9.attn_v.weight = util.global.load @__auto.blk.9.attn_v.weight : tensor<1024x4096xf16>
    %85 = torch_c.from_builtin_tensor %__auto.blk.9.attn_v.weight : tensor<1024x4096xf16> -> !torch.vtensor<[1024,4096],f16>
    %__auto.blk.9.attn_output.weight = util.global.load @__auto.blk.9.attn_output.weight : tensor<4096x4096xf16>
    %86 = torch_c.from_builtin_tensor %__auto.blk.9.attn_output.weight : tensor<4096x4096xf16> -> !torch.vtensor<[4096,4096],f16>
    %__auto.blk.9.ffn_norm.weight = util.global.load @__auto.blk.9.ffn_norm.weight : tensor<4096xf32>
    %87 = torch_c.from_builtin_tensor %__auto.blk.9.ffn_norm.weight : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.9.ffn_gate.weight = util.global.load @__auto.blk.9.ffn_gate.weight : tensor<14336x4096xf16>
    %88 = torch_c.from_builtin_tensor %__auto.blk.9.ffn_gate.weight : tensor<14336x4096xf16> -> !torch.vtensor<[14336,4096],f16>
    %__auto.blk.9.ffn_up.weight = util.global.load @__auto.blk.9.ffn_up.weight : tensor<14336x4096xf16>
    %89 = torch_c.from_builtin_tensor %__auto.blk.9.ffn_up.weight : tensor<14336x4096xf16> -> !torch.vtensor<[14336,4096],f16>
    %__auto.blk.9.ffn_down.weight = util.global.load @__auto.blk.9.ffn_down.weight : tensor<4096x14336xf16>
    %90 = torch_c.from_builtin_tensor %__auto.blk.9.ffn_down.weight : tensor<4096x14336xf16> -> !torch.vtensor<[4096,14336],f16>
    %__auto.blk.10.attn_norm.weight = util.global.load @__auto.blk.10.attn_norm.weight : tensor<4096xf32>
    %91 = torch_c.from_builtin_tensor %__auto.blk.10.attn_norm.weight : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.10.attn_q.weight = util.global.load @__auto.blk.10.attn_q.weight : tensor<4096x4096xf16>
    %92 = torch_c.from_builtin_tensor %__auto.blk.10.attn_q.weight : tensor<4096x4096xf16> -> !torch.vtensor<[4096,4096],f16>
    %__auto.blk.10.attn_k.weight = util.global.load @__auto.blk.10.attn_k.weight : tensor<1024x4096xf16>
    %93 = torch_c.from_builtin_tensor %__auto.blk.10.attn_k.weight : tensor<1024x4096xf16> -> !torch.vtensor<[1024,4096],f16>
    %__auto.blk.10.attn_v.weight = util.global.load @__auto.blk.10.attn_v.weight : tensor<1024x4096xf16>
    %94 = torch_c.from_builtin_tensor %__auto.blk.10.attn_v.weight : tensor<1024x4096xf16> -> !torch.vtensor<[1024,4096],f16>
    %__auto.blk.10.attn_output.weight = util.global.load @__auto.blk.10.attn_output.weight : tensor<4096x4096xf16>
    %95 = torch_c.from_builtin_tensor %__auto.blk.10.attn_output.weight : tensor<4096x4096xf16> -> !torch.vtensor<[4096,4096],f16>
    %__auto.blk.10.ffn_norm.weight = util.global.load @__auto.blk.10.ffn_norm.weight : tensor<4096xf32>
    %96 = torch_c.from_builtin_tensor %__auto.blk.10.ffn_norm.weight : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.10.ffn_gate.weight = util.global.load @__auto.blk.10.ffn_gate.weight : tensor<14336x4096xf16>
    %97 = torch_c.from_builtin_tensor %__auto.blk.10.ffn_gate.weight : tensor<14336x4096xf16> -> !torch.vtensor<[14336,4096],f16>
    %__auto.blk.10.ffn_up.weight = util.global.load @__auto.blk.10.ffn_up.weight : tensor<14336x4096xf16>
    %98 = torch_c.from_builtin_tensor %__auto.blk.10.ffn_up.weight : tensor<14336x4096xf16> -> !torch.vtensor<[14336,4096],f16>
    %__auto.blk.10.ffn_down.weight = util.global.load @__auto.blk.10.ffn_down.weight : tensor<4096x14336xf16>
    %99 = torch_c.from_builtin_tensor %__auto.blk.10.ffn_down.weight : tensor<4096x14336xf16> -> !torch.vtensor<[4096,14336],f16>
    %__auto.blk.11.attn_norm.weight = util.global.load @__auto.blk.11.attn_norm.weight : tensor<4096xf32>
    %100 = torch_c.from_builtin_tensor %__auto.blk.11.attn_norm.weight : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.11.attn_q.weight = util.global.load @__auto.blk.11.attn_q.weight : tensor<4096x4096xf16>
    %101 = torch_c.from_builtin_tensor %__auto.blk.11.attn_q.weight : tensor<4096x4096xf16> -> !torch.vtensor<[4096,4096],f16>
    %__auto.blk.11.attn_k.weight = util.global.load @__auto.blk.11.attn_k.weight : tensor<1024x4096xf16>
    %102 = torch_c.from_builtin_tensor %__auto.blk.11.attn_k.weight : tensor<1024x4096xf16> -> !torch.vtensor<[1024,4096],f16>
    %__auto.blk.11.attn_v.weight = util.global.load @__auto.blk.11.attn_v.weight : tensor<1024x4096xf16>
    %103 = torch_c.from_builtin_tensor %__auto.blk.11.attn_v.weight : tensor<1024x4096xf16> -> !torch.vtensor<[1024,4096],f16>
    %__auto.blk.11.attn_output.weight = util.global.load @__auto.blk.11.attn_output.weight : tensor<4096x4096xf16>
    %104 = torch_c.from_builtin_tensor %__auto.blk.11.attn_output.weight : tensor<4096x4096xf16> -> !torch.vtensor<[4096,4096],f16>
    %__auto.blk.11.ffn_norm.weight = util.global.load @__auto.blk.11.ffn_norm.weight : tensor<4096xf32>
    %105 = torch_c.from_builtin_tensor %__auto.blk.11.ffn_norm.weight : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.11.ffn_gate.weight = util.global.load @__auto.blk.11.ffn_gate.weight : tensor<14336x4096xf16>
    %106 = torch_c.from_builtin_tensor %__auto.blk.11.ffn_gate.weight : tensor<14336x4096xf16> -> !torch.vtensor<[14336,4096],f16>
    %__auto.blk.11.ffn_up.weight = util.global.load @__auto.blk.11.ffn_up.weight : tensor<14336x4096xf16>
    %107 = torch_c.from_builtin_tensor %__auto.blk.11.ffn_up.weight : tensor<14336x4096xf16> -> !torch.vtensor<[14336,4096],f16>
    %__auto.blk.11.ffn_down.weight = util.global.load @__auto.blk.11.ffn_down.weight : tensor<4096x14336xf16>
    %108 = torch_c.from_builtin_tensor %__auto.blk.11.ffn_down.weight : tensor<4096x14336xf16> -> !torch.vtensor<[4096,14336],f16>
    %__auto.blk.12.attn_norm.weight = util.global.load @__auto.blk.12.attn_norm.weight : tensor<4096xf32>
    %109 = torch_c.from_builtin_tensor %__auto.blk.12.attn_norm.weight : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.12.attn_q.weight = util.global.load @__auto.blk.12.attn_q.weight : tensor<4096x4096xf16>
    %110 = torch_c.from_builtin_tensor %__auto.blk.12.attn_q.weight : tensor<4096x4096xf16> -> !torch.vtensor<[4096,4096],f16>
    %__auto.blk.12.attn_k.weight = util.global.load @__auto.blk.12.attn_k.weight : tensor<1024x4096xf16>
    %111 = torch_c.from_builtin_tensor %__auto.blk.12.attn_k.weight : tensor<1024x4096xf16> -> !torch.vtensor<[1024,4096],f16>
    %__auto.blk.12.attn_v.weight = util.global.load @__auto.blk.12.attn_v.weight : tensor<1024x4096xf16>
    %112 = torch_c.from_builtin_tensor %__auto.blk.12.attn_v.weight : tensor<1024x4096xf16> -> !torch.vtensor<[1024,4096],f16>
    %__auto.blk.12.attn_output.weight = util.global.load @__auto.blk.12.attn_output.weight : tensor<4096x4096xf16>
    %113 = torch_c.from_builtin_tensor %__auto.blk.12.attn_output.weight : tensor<4096x4096xf16> -> !torch.vtensor<[4096,4096],f16>
    %__auto.blk.12.ffn_norm.weight = util.global.load @__auto.blk.12.ffn_norm.weight : tensor<4096xf32>
    %114 = torch_c.from_builtin_tensor %__auto.blk.12.ffn_norm.weight : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.12.ffn_gate.weight = util.global.load @__auto.blk.12.ffn_gate.weight : tensor<14336x4096xf16>
    %115 = torch_c.from_builtin_tensor %__auto.blk.12.ffn_gate.weight : tensor<14336x4096xf16> -> !torch.vtensor<[14336,4096],f16>
    %__auto.blk.12.ffn_up.weight = util.global.load @__auto.blk.12.ffn_up.weight : tensor<14336x4096xf16>
    %116 = torch_c.from_builtin_tensor %__auto.blk.12.ffn_up.weight : tensor<14336x4096xf16> -> !torch.vtensor<[14336,4096],f16>
    %__auto.blk.12.ffn_down.weight = util.global.load @__auto.blk.12.ffn_down.weight : tensor<4096x14336xf16>
    %117 = torch_c.from_builtin_tensor %__auto.blk.12.ffn_down.weight : tensor<4096x14336xf16> -> !torch.vtensor<[4096,14336],f16>
    %__auto.blk.13.attn_norm.weight = util.global.load @__auto.blk.13.attn_norm.weight : tensor<4096xf32>
    %118 = torch_c.from_builtin_tensor %__auto.blk.13.attn_norm.weight : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.13.attn_q.weight = util.global.load @__auto.blk.13.attn_q.weight : tensor<4096x4096xf16>
    %119 = torch_c.from_builtin_tensor %__auto.blk.13.attn_q.weight : tensor<4096x4096xf16> -> !torch.vtensor<[4096,4096],f16>
    %__auto.blk.13.attn_k.weight = util.global.load @__auto.blk.13.attn_k.weight : tensor<1024x4096xf16>
    %120 = torch_c.from_builtin_tensor %__auto.blk.13.attn_k.weight : tensor<1024x4096xf16> -> !torch.vtensor<[1024,4096],f16>
    %__auto.blk.13.attn_v.weight = util.global.load @__auto.blk.13.attn_v.weight : tensor<1024x4096xf16>
    %121 = torch_c.from_builtin_tensor %__auto.blk.13.attn_v.weight : tensor<1024x4096xf16> -> !torch.vtensor<[1024,4096],f16>
    %__auto.blk.13.attn_output.weight = util.global.load @__auto.blk.13.attn_output.weight : tensor<4096x4096xf16>
    %122 = torch_c.from_builtin_tensor %__auto.blk.13.attn_output.weight : tensor<4096x4096xf16> -> !torch.vtensor<[4096,4096],f16>
    %__auto.blk.13.ffn_norm.weight = util.global.load @__auto.blk.13.ffn_norm.weight : tensor<4096xf32>
    %123 = torch_c.from_builtin_tensor %__auto.blk.13.ffn_norm.weight : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.13.ffn_gate.weight = util.global.load @__auto.blk.13.ffn_gate.weight : tensor<14336x4096xf16>
    %124 = torch_c.from_builtin_tensor %__auto.blk.13.ffn_gate.weight : tensor<14336x4096xf16> -> !torch.vtensor<[14336,4096],f16>
    %__auto.blk.13.ffn_up.weight = util.global.load @__auto.blk.13.ffn_up.weight : tensor<14336x4096xf16>
    %125 = torch_c.from_builtin_tensor %__auto.blk.13.ffn_up.weight : tensor<14336x4096xf16> -> !torch.vtensor<[14336,4096],f16>
    %__auto.blk.13.ffn_down.weight = util.global.load @__auto.blk.13.ffn_down.weight : tensor<4096x14336xf16>
    %126 = torch_c.from_builtin_tensor %__auto.blk.13.ffn_down.weight : tensor<4096x14336xf16> -> !torch.vtensor<[4096,14336],f16>
    %__auto.blk.14.attn_norm.weight = util.global.load @__auto.blk.14.attn_norm.weight : tensor<4096xf32>
    %127 = torch_c.from_builtin_tensor %__auto.blk.14.attn_norm.weight : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.14.attn_q.weight = util.global.load @__auto.blk.14.attn_q.weight : tensor<4096x4096xf16>
    %128 = torch_c.from_builtin_tensor %__auto.blk.14.attn_q.weight : tensor<4096x4096xf16> -> !torch.vtensor<[4096,4096],f16>
    %__auto.blk.14.attn_k.weight = util.global.load @__auto.blk.14.attn_k.weight : tensor<1024x4096xf16>
    %129 = torch_c.from_builtin_tensor %__auto.blk.14.attn_k.weight : tensor<1024x4096xf16> -> !torch.vtensor<[1024,4096],f16>
    %__auto.blk.14.attn_v.weight = util.global.load @__auto.blk.14.attn_v.weight : tensor<1024x4096xf16>
    %130 = torch_c.from_builtin_tensor %__auto.blk.14.attn_v.weight : tensor<1024x4096xf16> -> !torch.vtensor<[1024,4096],f16>
    %__auto.blk.14.attn_output.weight = util.global.load @__auto.blk.14.attn_output.weight : tensor<4096x4096xf16>
    %131 = torch_c.from_builtin_tensor %__auto.blk.14.attn_output.weight : tensor<4096x4096xf16> -> !torch.vtensor<[4096,4096],f16>
    %__auto.blk.14.ffn_norm.weight = util.global.load @__auto.blk.14.ffn_norm.weight : tensor<4096xf32>
    %132 = torch_c.from_builtin_tensor %__auto.blk.14.ffn_norm.weight : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.14.ffn_gate.weight = util.global.load @__auto.blk.14.ffn_gate.weight : tensor<14336x4096xf16>
    %133 = torch_c.from_builtin_tensor %__auto.blk.14.ffn_gate.weight : tensor<14336x4096xf16> -> !torch.vtensor<[14336,4096],f16>
    %__auto.blk.14.ffn_up.weight = util.global.load @__auto.blk.14.ffn_up.weight : tensor<14336x4096xf16>
    %134 = torch_c.from_builtin_tensor %__auto.blk.14.ffn_up.weight : tensor<14336x4096xf16> -> !torch.vtensor<[14336,4096],f16>
    %__auto.blk.14.ffn_down.weight = util.global.load @__auto.blk.14.ffn_down.weight : tensor<4096x14336xf16>
    %135 = torch_c.from_builtin_tensor %__auto.blk.14.ffn_down.weight : tensor<4096x14336xf16> -> !torch.vtensor<[4096,14336],f16>
    %__auto.blk.15.attn_norm.weight = util.global.load @__auto.blk.15.attn_norm.weight : tensor<4096xf32>
    %136 = torch_c.from_builtin_tensor %__auto.blk.15.attn_norm.weight : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.15.attn_q.weight = util.global.load @__auto.blk.15.attn_q.weight : tensor<4096x4096xf16>
    %137 = torch_c.from_builtin_tensor %__auto.blk.15.attn_q.weight : tensor<4096x4096xf16> -> !torch.vtensor<[4096,4096],f16>
    %__auto.blk.15.attn_k.weight = util.global.load @__auto.blk.15.attn_k.weight : tensor<1024x4096xf16>
    %138 = torch_c.from_builtin_tensor %__auto.blk.15.attn_k.weight : tensor<1024x4096xf16> -> !torch.vtensor<[1024,4096],f16>
    %__auto.blk.15.attn_v.weight = util.global.load @__auto.blk.15.attn_v.weight : tensor<1024x4096xf16>
    %139 = torch_c.from_builtin_tensor %__auto.blk.15.attn_v.weight : tensor<1024x4096xf16> -> !torch.vtensor<[1024,4096],f16>
    %__auto.blk.15.attn_output.weight = util.global.load @__auto.blk.15.attn_output.weight : tensor<4096x4096xf16>
    %140 = torch_c.from_builtin_tensor %__auto.blk.15.attn_output.weight : tensor<4096x4096xf16> -> !torch.vtensor<[4096,4096],f16>
    %__auto.blk.15.ffn_norm.weight = util.global.load @__auto.blk.15.ffn_norm.weight : tensor<4096xf32>
    %141 = torch_c.from_builtin_tensor %__auto.blk.15.ffn_norm.weight : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.15.ffn_gate.weight = util.global.load @__auto.blk.15.ffn_gate.weight : tensor<14336x4096xf16>
    %142 = torch_c.from_builtin_tensor %__auto.blk.15.ffn_gate.weight : tensor<14336x4096xf16> -> !torch.vtensor<[14336,4096],f16>
    %__auto.blk.15.ffn_up.weight = util.global.load @__auto.blk.15.ffn_up.weight : tensor<14336x4096xf16>
    %143 = torch_c.from_builtin_tensor %__auto.blk.15.ffn_up.weight : tensor<14336x4096xf16> -> !torch.vtensor<[14336,4096],f16>
    %__auto.blk.15.ffn_down.weight = util.global.load @__auto.blk.15.ffn_down.weight : tensor<4096x14336xf16>
    %144 = torch_c.from_builtin_tensor %__auto.blk.15.ffn_down.weight : tensor<4096x14336xf16> -> !torch.vtensor<[4096,14336],f16>
    %__auto.blk.16.attn_norm.weight = util.global.load @__auto.blk.16.attn_norm.weight : tensor<4096xf32>
    %145 = torch_c.from_builtin_tensor %__auto.blk.16.attn_norm.weight : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.16.attn_q.weight = util.global.load @__auto.blk.16.attn_q.weight : tensor<4096x4096xf16>
    %146 = torch_c.from_builtin_tensor %__auto.blk.16.attn_q.weight : tensor<4096x4096xf16> -> !torch.vtensor<[4096,4096],f16>
    %__auto.blk.16.attn_k.weight = util.global.load @__auto.blk.16.attn_k.weight : tensor<1024x4096xf16>
    %147 = torch_c.from_builtin_tensor %__auto.blk.16.attn_k.weight : tensor<1024x4096xf16> -> !torch.vtensor<[1024,4096],f16>
    %__auto.blk.16.attn_v.weight = util.global.load @__auto.blk.16.attn_v.weight : tensor<1024x4096xf16>
    %148 = torch_c.from_builtin_tensor %__auto.blk.16.attn_v.weight : tensor<1024x4096xf16> -> !torch.vtensor<[1024,4096],f16>
    %__auto.blk.16.attn_output.weight = util.global.load @__auto.blk.16.attn_output.weight : tensor<4096x4096xf16>
    %149 = torch_c.from_builtin_tensor %__auto.blk.16.attn_output.weight : tensor<4096x4096xf16> -> !torch.vtensor<[4096,4096],f16>
    %__auto.blk.16.ffn_norm.weight = util.global.load @__auto.blk.16.ffn_norm.weight : tensor<4096xf32>
    %150 = torch_c.from_builtin_tensor %__auto.blk.16.ffn_norm.weight : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.16.ffn_gate.weight = util.global.load @__auto.blk.16.ffn_gate.weight : tensor<14336x4096xf16>
    %151 = torch_c.from_builtin_tensor %__auto.blk.16.ffn_gate.weight : tensor<14336x4096xf16> -> !torch.vtensor<[14336,4096],f16>
    %__auto.blk.16.ffn_up.weight = util.global.load @__auto.blk.16.ffn_up.weight : tensor<14336x4096xf16>
    %152 = torch_c.from_builtin_tensor %__auto.blk.16.ffn_up.weight : tensor<14336x4096xf16> -> !torch.vtensor<[14336,4096],f16>
    %__auto.blk.16.ffn_down.weight = util.global.load @__auto.blk.16.ffn_down.weight : tensor<4096x14336xf16>
    %153 = torch_c.from_builtin_tensor %__auto.blk.16.ffn_down.weight : tensor<4096x14336xf16> -> !torch.vtensor<[4096,14336],f16>
    %__auto.blk.17.attn_norm.weight = util.global.load @__auto.blk.17.attn_norm.weight : tensor<4096xf32>
    %154 = torch_c.from_builtin_tensor %__auto.blk.17.attn_norm.weight : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.17.attn_q.weight = util.global.load @__auto.blk.17.attn_q.weight : tensor<4096x4096xf16>
    %155 = torch_c.from_builtin_tensor %__auto.blk.17.attn_q.weight : tensor<4096x4096xf16> -> !torch.vtensor<[4096,4096],f16>
    %__auto.blk.17.attn_k.weight = util.global.load @__auto.blk.17.attn_k.weight : tensor<1024x4096xf16>
    %156 = torch_c.from_builtin_tensor %__auto.blk.17.attn_k.weight : tensor<1024x4096xf16> -> !torch.vtensor<[1024,4096],f16>
    %__auto.blk.17.attn_v.weight = util.global.load @__auto.blk.17.attn_v.weight : tensor<1024x4096xf16>
    %157 = torch_c.from_builtin_tensor %__auto.blk.17.attn_v.weight : tensor<1024x4096xf16> -> !torch.vtensor<[1024,4096],f16>
    %__auto.blk.17.attn_output.weight = util.global.load @__auto.blk.17.attn_output.weight : tensor<4096x4096xf16>
    %158 = torch_c.from_builtin_tensor %__auto.blk.17.attn_output.weight : tensor<4096x4096xf16> -> !torch.vtensor<[4096,4096],f16>
    %__auto.blk.17.ffn_norm.weight = util.global.load @__auto.blk.17.ffn_norm.weight : tensor<4096xf32>
    %159 = torch_c.from_builtin_tensor %__auto.blk.17.ffn_norm.weight : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.17.ffn_gate.weight = util.global.load @__auto.blk.17.ffn_gate.weight : tensor<14336x4096xf16>
    %160 = torch_c.from_builtin_tensor %__auto.blk.17.ffn_gate.weight : tensor<14336x4096xf16> -> !torch.vtensor<[14336,4096],f16>
    %__auto.blk.17.ffn_up.weight = util.global.load @__auto.blk.17.ffn_up.weight : tensor<14336x4096xf16>
    %161 = torch_c.from_builtin_tensor %__auto.blk.17.ffn_up.weight : tensor<14336x4096xf16> -> !torch.vtensor<[14336,4096],f16>
    %__auto.blk.17.ffn_down.weight = util.global.load @__auto.blk.17.ffn_down.weight : tensor<4096x14336xf16>
    %162 = torch_c.from_builtin_tensor %__auto.blk.17.ffn_down.weight : tensor<4096x14336xf16> -> !torch.vtensor<[4096,14336],f16>
    %__auto.blk.18.attn_norm.weight = util.global.load @__auto.blk.18.attn_norm.weight : tensor<4096xf32>
    %163 = torch_c.from_builtin_tensor %__auto.blk.18.attn_norm.weight : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.18.attn_q.weight = util.global.load @__auto.blk.18.attn_q.weight : tensor<4096x4096xf16>
    %164 = torch_c.from_builtin_tensor %__auto.blk.18.attn_q.weight : tensor<4096x4096xf16> -> !torch.vtensor<[4096,4096],f16>
    %__auto.blk.18.attn_k.weight = util.global.load @__auto.blk.18.attn_k.weight : tensor<1024x4096xf16>
    %165 = torch_c.from_builtin_tensor %__auto.blk.18.attn_k.weight : tensor<1024x4096xf16> -> !torch.vtensor<[1024,4096],f16>
    %__auto.blk.18.attn_v.weight = util.global.load @__auto.blk.18.attn_v.weight : tensor<1024x4096xf16>
    %166 = torch_c.from_builtin_tensor %__auto.blk.18.attn_v.weight : tensor<1024x4096xf16> -> !torch.vtensor<[1024,4096],f16>
    %__auto.blk.18.attn_output.weight = util.global.load @__auto.blk.18.attn_output.weight : tensor<4096x4096xf16>
    %167 = torch_c.from_builtin_tensor %__auto.blk.18.attn_output.weight : tensor<4096x4096xf16> -> !torch.vtensor<[4096,4096],f16>
    %__auto.blk.18.ffn_norm.weight = util.global.load @__auto.blk.18.ffn_norm.weight : tensor<4096xf32>
    %168 = torch_c.from_builtin_tensor %__auto.blk.18.ffn_norm.weight : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.18.ffn_gate.weight = util.global.load @__auto.blk.18.ffn_gate.weight : tensor<14336x4096xf16>
    %169 = torch_c.from_builtin_tensor %__auto.blk.18.ffn_gate.weight : tensor<14336x4096xf16> -> !torch.vtensor<[14336,4096],f16>
    %__auto.blk.18.ffn_up.weight = util.global.load @__auto.blk.18.ffn_up.weight : tensor<14336x4096xf16>
    %170 = torch_c.from_builtin_tensor %__auto.blk.18.ffn_up.weight : tensor<14336x4096xf16> -> !torch.vtensor<[14336,4096],f16>
    %__auto.blk.18.ffn_down.weight = util.global.load @__auto.blk.18.ffn_down.weight : tensor<4096x14336xf16>
    %171 = torch_c.from_builtin_tensor %__auto.blk.18.ffn_down.weight : tensor<4096x14336xf16> -> !torch.vtensor<[4096,14336],f16>
    %__auto.blk.19.attn_norm.weight = util.global.load @__auto.blk.19.attn_norm.weight : tensor<4096xf32>
    %172 = torch_c.from_builtin_tensor %__auto.blk.19.attn_norm.weight : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.19.attn_q.weight = util.global.load @__auto.blk.19.attn_q.weight : tensor<4096x4096xf16>
    %173 = torch_c.from_builtin_tensor %__auto.blk.19.attn_q.weight : tensor<4096x4096xf16> -> !torch.vtensor<[4096,4096],f16>
    %__auto.blk.19.attn_k.weight = util.global.load @__auto.blk.19.attn_k.weight : tensor<1024x4096xf16>
    %174 = torch_c.from_builtin_tensor %__auto.blk.19.attn_k.weight : tensor<1024x4096xf16> -> !torch.vtensor<[1024,4096],f16>
    %__auto.blk.19.attn_v.weight = util.global.load @__auto.blk.19.attn_v.weight : tensor<1024x4096xf16>
    %175 = torch_c.from_builtin_tensor %__auto.blk.19.attn_v.weight : tensor<1024x4096xf16> -> !torch.vtensor<[1024,4096],f16>
    %__auto.blk.19.attn_output.weight = util.global.load @__auto.blk.19.attn_output.weight : tensor<4096x4096xf16>
    %176 = torch_c.from_builtin_tensor %__auto.blk.19.attn_output.weight : tensor<4096x4096xf16> -> !torch.vtensor<[4096,4096],f16>
    %__auto.blk.19.ffn_norm.weight = util.global.load @__auto.blk.19.ffn_norm.weight : tensor<4096xf32>
    %177 = torch_c.from_builtin_tensor %__auto.blk.19.ffn_norm.weight : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.19.ffn_gate.weight = util.global.load @__auto.blk.19.ffn_gate.weight : tensor<14336x4096xf16>
    %178 = torch_c.from_builtin_tensor %__auto.blk.19.ffn_gate.weight : tensor<14336x4096xf16> -> !torch.vtensor<[14336,4096],f16>
    %__auto.blk.19.ffn_up.weight = util.global.load @__auto.blk.19.ffn_up.weight : tensor<14336x4096xf16>
    %179 = torch_c.from_builtin_tensor %__auto.blk.19.ffn_up.weight : tensor<14336x4096xf16> -> !torch.vtensor<[14336,4096],f16>
    %__auto.blk.19.ffn_down.weight = util.global.load @__auto.blk.19.ffn_down.weight : tensor<4096x14336xf16>
    %180 = torch_c.from_builtin_tensor %__auto.blk.19.ffn_down.weight : tensor<4096x14336xf16> -> !torch.vtensor<[4096,14336],f16>
    %__auto.blk.20.attn_norm.weight = util.global.load @__auto.blk.20.attn_norm.weight : tensor<4096xf32>
    %181 = torch_c.from_builtin_tensor %__auto.blk.20.attn_norm.weight : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.20.attn_q.weight = util.global.load @__auto.blk.20.attn_q.weight : tensor<4096x4096xf16>
    %182 = torch_c.from_builtin_tensor %__auto.blk.20.attn_q.weight : tensor<4096x4096xf16> -> !torch.vtensor<[4096,4096],f16>
    %__auto.blk.20.attn_k.weight = util.global.load @__auto.blk.20.attn_k.weight : tensor<1024x4096xf16>
    %183 = torch_c.from_builtin_tensor %__auto.blk.20.attn_k.weight : tensor<1024x4096xf16> -> !torch.vtensor<[1024,4096],f16>
    %__auto.blk.20.attn_v.weight = util.global.load @__auto.blk.20.attn_v.weight : tensor<1024x4096xf16>
    %184 = torch_c.from_builtin_tensor %__auto.blk.20.attn_v.weight : tensor<1024x4096xf16> -> !torch.vtensor<[1024,4096],f16>
    %__auto.blk.20.attn_output.weight = util.global.load @__auto.blk.20.attn_output.weight : tensor<4096x4096xf16>
    %185 = torch_c.from_builtin_tensor %__auto.blk.20.attn_output.weight : tensor<4096x4096xf16> -> !torch.vtensor<[4096,4096],f16>
    %__auto.blk.20.ffn_norm.weight = util.global.load @__auto.blk.20.ffn_norm.weight : tensor<4096xf32>
    %186 = torch_c.from_builtin_tensor %__auto.blk.20.ffn_norm.weight : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.20.ffn_gate.weight = util.global.load @__auto.blk.20.ffn_gate.weight : tensor<14336x4096xf16>
    %187 = torch_c.from_builtin_tensor %__auto.blk.20.ffn_gate.weight : tensor<14336x4096xf16> -> !torch.vtensor<[14336,4096],f16>
    %__auto.blk.20.ffn_up.weight = util.global.load @__auto.blk.20.ffn_up.weight : tensor<14336x4096xf16>
    %188 = torch_c.from_builtin_tensor %__auto.blk.20.ffn_up.weight : tensor<14336x4096xf16> -> !torch.vtensor<[14336,4096],f16>
    %__auto.blk.20.ffn_down.weight = util.global.load @__auto.blk.20.ffn_down.weight : tensor<4096x14336xf16>
    %189 = torch_c.from_builtin_tensor %__auto.blk.20.ffn_down.weight : tensor<4096x14336xf16> -> !torch.vtensor<[4096,14336],f16>
    %__auto.blk.21.attn_norm.weight = util.global.load @__auto.blk.21.attn_norm.weight : tensor<4096xf32>
    %190 = torch_c.from_builtin_tensor %__auto.blk.21.attn_norm.weight : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.21.attn_q.weight = util.global.load @__auto.blk.21.attn_q.weight : tensor<4096x4096xf16>
    %191 = torch_c.from_builtin_tensor %__auto.blk.21.attn_q.weight : tensor<4096x4096xf16> -> !torch.vtensor<[4096,4096],f16>
    %__auto.blk.21.attn_k.weight = util.global.load @__auto.blk.21.attn_k.weight : tensor<1024x4096xf16>
    %192 = torch_c.from_builtin_tensor %__auto.blk.21.attn_k.weight : tensor<1024x4096xf16> -> !torch.vtensor<[1024,4096],f16>
    %__auto.blk.21.attn_v.weight = util.global.load @__auto.blk.21.attn_v.weight : tensor<1024x4096xf16>
    %193 = torch_c.from_builtin_tensor %__auto.blk.21.attn_v.weight : tensor<1024x4096xf16> -> !torch.vtensor<[1024,4096],f16>
    %__auto.blk.21.attn_output.weight = util.global.load @__auto.blk.21.attn_output.weight : tensor<4096x4096xf16>
    %194 = torch_c.from_builtin_tensor %__auto.blk.21.attn_output.weight : tensor<4096x4096xf16> -> !torch.vtensor<[4096,4096],f16>
    %__auto.blk.21.ffn_norm.weight = util.global.load @__auto.blk.21.ffn_norm.weight : tensor<4096xf32>
    %195 = torch_c.from_builtin_tensor %__auto.blk.21.ffn_norm.weight : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.21.ffn_gate.weight = util.global.load @__auto.blk.21.ffn_gate.weight : tensor<14336x4096xf16>
    %196 = torch_c.from_builtin_tensor %__auto.blk.21.ffn_gate.weight : tensor<14336x4096xf16> -> !torch.vtensor<[14336,4096],f16>
    %__auto.blk.21.ffn_up.weight = util.global.load @__auto.blk.21.ffn_up.weight : tensor<14336x4096xf16>
    %197 = torch_c.from_builtin_tensor %__auto.blk.21.ffn_up.weight : tensor<14336x4096xf16> -> !torch.vtensor<[14336,4096],f16>
    %__auto.blk.21.ffn_down.weight = util.global.load @__auto.blk.21.ffn_down.weight : tensor<4096x14336xf16>
    %198 = torch_c.from_builtin_tensor %__auto.blk.21.ffn_down.weight : tensor<4096x14336xf16> -> !torch.vtensor<[4096,14336],f16>
    %__auto.blk.22.attn_norm.weight = util.global.load @__auto.blk.22.attn_norm.weight : tensor<4096xf32>
    %199 = torch_c.from_builtin_tensor %__auto.blk.22.attn_norm.weight : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.22.attn_q.weight = util.global.load @__auto.blk.22.attn_q.weight : tensor<4096x4096xf16>
    %200 = torch_c.from_builtin_tensor %__auto.blk.22.attn_q.weight : tensor<4096x4096xf16> -> !torch.vtensor<[4096,4096],f16>
    %__auto.blk.22.attn_k.weight = util.global.load @__auto.blk.22.attn_k.weight : tensor<1024x4096xf16>
    %201 = torch_c.from_builtin_tensor %__auto.blk.22.attn_k.weight : tensor<1024x4096xf16> -> !torch.vtensor<[1024,4096],f16>
    %__auto.blk.22.attn_v.weight = util.global.load @__auto.blk.22.attn_v.weight : tensor<1024x4096xf16>
    %202 = torch_c.from_builtin_tensor %__auto.blk.22.attn_v.weight : tensor<1024x4096xf16> -> !torch.vtensor<[1024,4096],f16>
    %__auto.blk.22.attn_output.weight = util.global.load @__auto.blk.22.attn_output.weight : tensor<4096x4096xf16>
    %203 = torch_c.from_builtin_tensor %__auto.blk.22.attn_output.weight : tensor<4096x4096xf16> -> !torch.vtensor<[4096,4096],f16>
    %__auto.blk.22.ffn_norm.weight = util.global.load @__auto.blk.22.ffn_norm.weight : tensor<4096xf32>
    %204 = torch_c.from_builtin_tensor %__auto.blk.22.ffn_norm.weight : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.22.ffn_gate.weight = util.global.load @__auto.blk.22.ffn_gate.weight : tensor<14336x4096xf16>
    %205 = torch_c.from_builtin_tensor %__auto.blk.22.ffn_gate.weight : tensor<14336x4096xf16> -> !torch.vtensor<[14336,4096],f16>
    %__auto.blk.22.ffn_up.weight = util.global.load @__auto.blk.22.ffn_up.weight : tensor<14336x4096xf16>
    %206 = torch_c.from_builtin_tensor %__auto.blk.22.ffn_up.weight : tensor<14336x4096xf16> -> !torch.vtensor<[14336,4096],f16>
    %__auto.blk.22.ffn_down.weight = util.global.load @__auto.blk.22.ffn_down.weight : tensor<4096x14336xf16>
    %207 = torch_c.from_builtin_tensor %__auto.blk.22.ffn_down.weight : tensor<4096x14336xf16> -> !torch.vtensor<[4096,14336],f16>
    %__auto.blk.23.attn_norm.weight = util.global.load @__auto.blk.23.attn_norm.weight : tensor<4096xf32>
    %208 = torch_c.from_builtin_tensor %__auto.blk.23.attn_norm.weight : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.23.attn_q.weight = util.global.load @__auto.blk.23.attn_q.weight : tensor<4096x4096xf16>
    %209 = torch_c.from_builtin_tensor %__auto.blk.23.attn_q.weight : tensor<4096x4096xf16> -> !torch.vtensor<[4096,4096],f16>
    %__auto.blk.23.attn_k.weight = util.global.load @__auto.blk.23.attn_k.weight : tensor<1024x4096xf16>
    %210 = torch_c.from_builtin_tensor %__auto.blk.23.attn_k.weight : tensor<1024x4096xf16> -> !torch.vtensor<[1024,4096],f16>
    %__auto.blk.23.attn_v.weight = util.global.load @__auto.blk.23.attn_v.weight : tensor<1024x4096xf16>
    %211 = torch_c.from_builtin_tensor %__auto.blk.23.attn_v.weight : tensor<1024x4096xf16> -> !torch.vtensor<[1024,4096],f16>
    %__auto.blk.23.attn_output.weight = util.global.load @__auto.blk.23.attn_output.weight : tensor<4096x4096xf16>
    %212 = torch_c.from_builtin_tensor %__auto.blk.23.attn_output.weight : tensor<4096x4096xf16> -> !torch.vtensor<[4096,4096],f16>
    %__auto.blk.23.ffn_norm.weight = util.global.load @__auto.blk.23.ffn_norm.weight : tensor<4096xf32>
    %213 = torch_c.from_builtin_tensor %__auto.blk.23.ffn_norm.weight : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.23.ffn_gate.weight = util.global.load @__auto.blk.23.ffn_gate.weight : tensor<14336x4096xf16>
    %214 = torch_c.from_builtin_tensor %__auto.blk.23.ffn_gate.weight : tensor<14336x4096xf16> -> !torch.vtensor<[14336,4096],f16>
    %__auto.blk.23.ffn_up.weight = util.global.load @__auto.blk.23.ffn_up.weight : tensor<14336x4096xf16>
    %215 = torch_c.from_builtin_tensor %__auto.blk.23.ffn_up.weight : tensor<14336x4096xf16> -> !torch.vtensor<[14336,4096],f16>
    %__auto.blk.23.ffn_down.weight = util.global.load @__auto.blk.23.ffn_down.weight : tensor<4096x14336xf16>
    %216 = torch_c.from_builtin_tensor %__auto.blk.23.ffn_down.weight : tensor<4096x14336xf16> -> !torch.vtensor<[4096,14336],f16>
    %__auto.blk.24.attn_norm.weight = util.global.load @__auto.blk.24.attn_norm.weight : tensor<4096xf32>
    %217 = torch_c.from_builtin_tensor %__auto.blk.24.attn_norm.weight : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.24.attn_q.weight = util.global.load @__auto.blk.24.attn_q.weight : tensor<4096x4096xf16>
    %218 = torch_c.from_builtin_tensor %__auto.blk.24.attn_q.weight : tensor<4096x4096xf16> -> !torch.vtensor<[4096,4096],f16>
    %__auto.blk.24.attn_k.weight = util.global.load @__auto.blk.24.attn_k.weight : tensor<1024x4096xf16>
    %219 = torch_c.from_builtin_tensor %__auto.blk.24.attn_k.weight : tensor<1024x4096xf16> -> !torch.vtensor<[1024,4096],f16>
    %__auto.blk.24.attn_v.weight = util.global.load @__auto.blk.24.attn_v.weight : tensor<1024x4096xf16>
    %220 = torch_c.from_builtin_tensor %__auto.blk.24.attn_v.weight : tensor<1024x4096xf16> -> !torch.vtensor<[1024,4096],f16>
    %__auto.blk.24.attn_output.weight = util.global.load @__auto.blk.24.attn_output.weight : tensor<4096x4096xf16>
    %221 = torch_c.from_builtin_tensor %__auto.blk.24.attn_output.weight : tensor<4096x4096xf16> -> !torch.vtensor<[4096,4096],f16>
    %__auto.blk.24.ffn_norm.weight = util.global.load @__auto.blk.24.ffn_norm.weight : tensor<4096xf32>
    %222 = torch_c.from_builtin_tensor %__auto.blk.24.ffn_norm.weight : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.24.ffn_gate.weight = util.global.load @__auto.blk.24.ffn_gate.weight : tensor<14336x4096xf16>
    %223 = torch_c.from_builtin_tensor %__auto.blk.24.ffn_gate.weight : tensor<14336x4096xf16> -> !torch.vtensor<[14336,4096],f16>
    %__auto.blk.24.ffn_up.weight = util.global.load @__auto.blk.24.ffn_up.weight : tensor<14336x4096xf16>
    %224 = torch_c.from_builtin_tensor %__auto.blk.24.ffn_up.weight : tensor<14336x4096xf16> -> !torch.vtensor<[14336,4096],f16>
    %__auto.blk.24.ffn_down.weight = util.global.load @__auto.blk.24.ffn_down.weight : tensor<4096x14336xf16>
    %225 = torch_c.from_builtin_tensor %__auto.blk.24.ffn_down.weight : tensor<4096x14336xf16> -> !torch.vtensor<[4096,14336],f16>
    %__auto.blk.25.attn_norm.weight = util.global.load @__auto.blk.25.attn_norm.weight : tensor<4096xf32>
    %226 = torch_c.from_builtin_tensor %__auto.blk.25.attn_norm.weight : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.25.attn_q.weight = util.global.load @__auto.blk.25.attn_q.weight : tensor<4096x4096xf16>
    %227 = torch_c.from_builtin_tensor %__auto.blk.25.attn_q.weight : tensor<4096x4096xf16> -> !torch.vtensor<[4096,4096],f16>
    %__auto.blk.25.attn_k.weight = util.global.load @__auto.blk.25.attn_k.weight : tensor<1024x4096xf16>
    %228 = torch_c.from_builtin_tensor %__auto.blk.25.attn_k.weight : tensor<1024x4096xf16> -> !torch.vtensor<[1024,4096],f16>
    %__auto.blk.25.attn_v.weight = util.global.load @__auto.blk.25.attn_v.weight : tensor<1024x4096xf16>
    %229 = torch_c.from_builtin_tensor %__auto.blk.25.attn_v.weight : tensor<1024x4096xf16> -> !torch.vtensor<[1024,4096],f16>
    %__auto.blk.25.attn_output.weight = util.global.load @__auto.blk.25.attn_output.weight : tensor<4096x4096xf16>
    %230 = torch_c.from_builtin_tensor %__auto.blk.25.attn_output.weight : tensor<4096x4096xf16> -> !torch.vtensor<[4096,4096],f16>
    %__auto.blk.25.ffn_norm.weight = util.global.load @__auto.blk.25.ffn_norm.weight : tensor<4096xf32>
    %231 = torch_c.from_builtin_tensor %__auto.blk.25.ffn_norm.weight : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.25.ffn_gate.weight = util.global.load @__auto.blk.25.ffn_gate.weight : tensor<14336x4096xf16>
    %232 = torch_c.from_builtin_tensor %__auto.blk.25.ffn_gate.weight : tensor<14336x4096xf16> -> !torch.vtensor<[14336,4096],f16>
    %__auto.blk.25.ffn_up.weight = util.global.load @__auto.blk.25.ffn_up.weight : tensor<14336x4096xf16>
    %233 = torch_c.from_builtin_tensor %__auto.blk.25.ffn_up.weight : tensor<14336x4096xf16> -> !torch.vtensor<[14336,4096],f16>
    %__auto.blk.25.ffn_down.weight = util.global.load @__auto.blk.25.ffn_down.weight : tensor<4096x14336xf16>
    %234 = torch_c.from_builtin_tensor %__auto.blk.25.ffn_down.weight : tensor<4096x14336xf16> -> !torch.vtensor<[4096,14336],f16>
    %__auto.blk.26.attn_norm.weight = util.global.load @__auto.blk.26.attn_norm.weight : tensor<4096xf32>
    %235 = torch_c.from_builtin_tensor %__auto.blk.26.attn_norm.weight : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.26.attn_q.weight = util.global.load @__auto.blk.26.attn_q.weight : tensor<4096x4096xf16>
    %236 = torch_c.from_builtin_tensor %__auto.blk.26.attn_q.weight : tensor<4096x4096xf16> -> !torch.vtensor<[4096,4096],f16>
    %__auto.blk.26.attn_k.weight = util.global.load @__auto.blk.26.attn_k.weight : tensor<1024x4096xf16>
    %237 = torch_c.from_builtin_tensor %__auto.blk.26.attn_k.weight : tensor<1024x4096xf16> -> !torch.vtensor<[1024,4096],f16>
    %__auto.blk.26.attn_v.weight = util.global.load @__auto.blk.26.attn_v.weight : tensor<1024x4096xf16>
    %238 = torch_c.from_builtin_tensor %__auto.blk.26.attn_v.weight : tensor<1024x4096xf16> -> !torch.vtensor<[1024,4096],f16>
    %__auto.blk.26.attn_output.weight = util.global.load @__auto.blk.26.attn_output.weight : tensor<4096x4096xf16>
    %239 = torch_c.from_builtin_tensor %__auto.blk.26.attn_output.weight : tensor<4096x4096xf16> -> !torch.vtensor<[4096,4096],f16>
    %__auto.blk.26.ffn_norm.weight = util.global.load @__auto.blk.26.ffn_norm.weight : tensor<4096xf32>
    %240 = torch_c.from_builtin_tensor %__auto.blk.26.ffn_norm.weight : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.26.ffn_gate.weight = util.global.load @__auto.blk.26.ffn_gate.weight : tensor<14336x4096xf16>
    %241 = torch_c.from_builtin_tensor %__auto.blk.26.ffn_gate.weight : tensor<14336x4096xf16> -> !torch.vtensor<[14336,4096],f16>
    %__auto.blk.26.ffn_up.weight = util.global.load @__auto.blk.26.ffn_up.weight : tensor<14336x4096xf16>
    %242 = torch_c.from_builtin_tensor %__auto.blk.26.ffn_up.weight : tensor<14336x4096xf16> -> !torch.vtensor<[14336,4096],f16>
    %__auto.blk.26.ffn_down.weight = util.global.load @__auto.blk.26.ffn_down.weight : tensor<4096x14336xf16>
    %243 = torch_c.from_builtin_tensor %__auto.blk.26.ffn_down.weight : tensor<4096x14336xf16> -> !torch.vtensor<[4096,14336],f16>
    %__auto.blk.27.attn_norm.weight = util.global.load @__auto.blk.27.attn_norm.weight : tensor<4096xf32>
    %244 = torch_c.from_builtin_tensor %__auto.blk.27.attn_norm.weight : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.27.attn_q.weight = util.global.load @__auto.blk.27.attn_q.weight : tensor<4096x4096xf16>
    %245 = torch_c.from_builtin_tensor %__auto.blk.27.attn_q.weight : tensor<4096x4096xf16> -> !torch.vtensor<[4096,4096],f16>
    %__auto.blk.27.attn_k.weight = util.global.load @__auto.blk.27.attn_k.weight : tensor<1024x4096xf16>
    %246 = torch_c.from_builtin_tensor %__auto.blk.27.attn_k.weight : tensor<1024x4096xf16> -> !torch.vtensor<[1024,4096],f16>
    %__auto.blk.27.attn_v.weight = util.global.load @__auto.blk.27.attn_v.weight : tensor<1024x4096xf16>
    %247 = torch_c.from_builtin_tensor %__auto.blk.27.attn_v.weight : tensor<1024x4096xf16> -> !torch.vtensor<[1024,4096],f16>
    %__auto.blk.27.attn_output.weight = util.global.load @__auto.blk.27.attn_output.weight : tensor<4096x4096xf16>
    %248 = torch_c.from_builtin_tensor %__auto.blk.27.attn_output.weight : tensor<4096x4096xf16> -> !torch.vtensor<[4096,4096],f16>
    %__auto.blk.27.ffn_norm.weight = util.global.load @__auto.blk.27.ffn_norm.weight : tensor<4096xf32>
    %249 = torch_c.from_builtin_tensor %__auto.blk.27.ffn_norm.weight : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.27.ffn_gate.weight = util.global.load @__auto.blk.27.ffn_gate.weight : tensor<14336x4096xf16>
    %250 = torch_c.from_builtin_tensor %__auto.blk.27.ffn_gate.weight : tensor<14336x4096xf16> -> !torch.vtensor<[14336,4096],f16>
    %__auto.blk.27.ffn_up.weight = util.global.load @__auto.blk.27.ffn_up.weight : tensor<14336x4096xf16>
    %251 = torch_c.from_builtin_tensor %__auto.blk.27.ffn_up.weight : tensor<14336x4096xf16> -> !torch.vtensor<[14336,4096],f16>
    %__auto.blk.27.ffn_down.weight = util.global.load @__auto.blk.27.ffn_down.weight : tensor<4096x14336xf16>
    %252 = torch_c.from_builtin_tensor %__auto.blk.27.ffn_down.weight : tensor<4096x14336xf16> -> !torch.vtensor<[4096,14336],f16>
    %__auto.blk.28.attn_norm.weight = util.global.load @__auto.blk.28.attn_norm.weight : tensor<4096xf32>
    %253 = torch_c.from_builtin_tensor %__auto.blk.28.attn_norm.weight : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.28.attn_q.weight = util.global.load @__auto.blk.28.attn_q.weight : tensor<4096x4096xf16>
    %254 = torch_c.from_builtin_tensor %__auto.blk.28.attn_q.weight : tensor<4096x4096xf16> -> !torch.vtensor<[4096,4096],f16>
    %__auto.blk.28.attn_k.weight = util.global.load @__auto.blk.28.attn_k.weight : tensor<1024x4096xf16>
    %255 = torch_c.from_builtin_tensor %__auto.blk.28.attn_k.weight : tensor<1024x4096xf16> -> !torch.vtensor<[1024,4096],f16>
    %__auto.blk.28.attn_v.weight = util.global.load @__auto.blk.28.attn_v.weight : tensor<1024x4096xf16>
    %256 = torch_c.from_builtin_tensor %__auto.blk.28.attn_v.weight : tensor<1024x4096xf16> -> !torch.vtensor<[1024,4096],f16>
    %__auto.blk.28.attn_output.weight = util.global.load @__auto.blk.28.attn_output.weight : tensor<4096x4096xf16>
    %257 = torch_c.from_builtin_tensor %__auto.blk.28.attn_output.weight : tensor<4096x4096xf16> -> !torch.vtensor<[4096,4096],f16>
    %__auto.blk.28.ffn_norm.weight = util.global.load @__auto.blk.28.ffn_norm.weight : tensor<4096xf32>
    %258 = torch_c.from_builtin_tensor %__auto.blk.28.ffn_norm.weight : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.28.ffn_gate.weight = util.global.load @__auto.blk.28.ffn_gate.weight : tensor<14336x4096xf16>
    %259 = torch_c.from_builtin_tensor %__auto.blk.28.ffn_gate.weight : tensor<14336x4096xf16> -> !torch.vtensor<[14336,4096],f16>
    %__auto.blk.28.ffn_up.weight = util.global.load @__auto.blk.28.ffn_up.weight : tensor<14336x4096xf16>
    %260 = torch_c.from_builtin_tensor %__auto.blk.28.ffn_up.weight : tensor<14336x4096xf16> -> !torch.vtensor<[14336,4096],f16>
    %__auto.blk.28.ffn_down.weight = util.global.load @__auto.blk.28.ffn_down.weight : tensor<4096x14336xf16>
    %261 = torch_c.from_builtin_tensor %__auto.blk.28.ffn_down.weight : tensor<4096x14336xf16> -> !torch.vtensor<[4096,14336],f16>
    %__auto.blk.29.attn_norm.weight = util.global.load @__auto.blk.29.attn_norm.weight : tensor<4096xf32>
    %262 = torch_c.from_builtin_tensor %__auto.blk.29.attn_norm.weight : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.29.attn_q.weight = util.global.load @__auto.blk.29.attn_q.weight : tensor<4096x4096xf16>
    %263 = torch_c.from_builtin_tensor %__auto.blk.29.attn_q.weight : tensor<4096x4096xf16> -> !torch.vtensor<[4096,4096],f16>
    %__auto.blk.29.attn_k.weight = util.global.load @__auto.blk.29.attn_k.weight : tensor<1024x4096xf16>
    %264 = torch_c.from_builtin_tensor %__auto.blk.29.attn_k.weight : tensor<1024x4096xf16> -> !torch.vtensor<[1024,4096],f16>
    %__auto.blk.29.attn_v.weight = util.global.load @__auto.blk.29.attn_v.weight : tensor<1024x4096xf16>
    %265 = torch_c.from_builtin_tensor %__auto.blk.29.attn_v.weight : tensor<1024x4096xf16> -> !torch.vtensor<[1024,4096],f16>
    %__auto.blk.29.attn_output.weight = util.global.load @__auto.blk.29.attn_output.weight : tensor<4096x4096xf16>
    %266 = torch_c.from_builtin_tensor %__auto.blk.29.attn_output.weight : tensor<4096x4096xf16> -> !torch.vtensor<[4096,4096],f16>
    %__auto.blk.29.ffn_norm.weight = util.global.load @__auto.blk.29.ffn_norm.weight : tensor<4096xf32>
    %267 = torch_c.from_builtin_tensor %__auto.blk.29.ffn_norm.weight : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.29.ffn_gate.weight = util.global.load @__auto.blk.29.ffn_gate.weight : tensor<14336x4096xf16>
    %268 = torch_c.from_builtin_tensor %__auto.blk.29.ffn_gate.weight : tensor<14336x4096xf16> -> !torch.vtensor<[14336,4096],f16>
    %__auto.blk.29.ffn_up.weight = util.global.load @__auto.blk.29.ffn_up.weight : tensor<14336x4096xf16>
    %269 = torch_c.from_builtin_tensor %__auto.blk.29.ffn_up.weight : tensor<14336x4096xf16> -> !torch.vtensor<[14336,4096],f16>
    %__auto.blk.29.ffn_down.weight = util.global.load @__auto.blk.29.ffn_down.weight : tensor<4096x14336xf16>
    %270 = torch_c.from_builtin_tensor %__auto.blk.29.ffn_down.weight : tensor<4096x14336xf16> -> !torch.vtensor<[4096,14336],f16>
    %__auto.blk.30.attn_norm.weight = util.global.load @__auto.blk.30.attn_norm.weight : tensor<4096xf32>
    %271 = torch_c.from_builtin_tensor %__auto.blk.30.attn_norm.weight : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.30.attn_q.weight = util.global.load @__auto.blk.30.attn_q.weight : tensor<4096x4096xf16>
    %272 = torch_c.from_builtin_tensor %__auto.blk.30.attn_q.weight : tensor<4096x4096xf16> -> !torch.vtensor<[4096,4096],f16>
    %__auto.blk.30.attn_k.weight = util.global.load @__auto.blk.30.attn_k.weight : tensor<1024x4096xf16>
    %273 = torch_c.from_builtin_tensor %__auto.blk.30.attn_k.weight : tensor<1024x4096xf16> -> !torch.vtensor<[1024,4096],f16>
    %__auto.blk.30.attn_v.weight = util.global.load @__auto.blk.30.attn_v.weight : tensor<1024x4096xf16>
    %274 = torch_c.from_builtin_tensor %__auto.blk.30.attn_v.weight : tensor<1024x4096xf16> -> !torch.vtensor<[1024,4096],f16>
    %__auto.blk.30.attn_output.weight = util.global.load @__auto.blk.30.attn_output.weight : tensor<4096x4096xf16>
    %275 = torch_c.from_builtin_tensor %__auto.blk.30.attn_output.weight : tensor<4096x4096xf16> -> !torch.vtensor<[4096,4096],f16>
    %__auto.blk.30.ffn_norm.weight = util.global.load @__auto.blk.30.ffn_norm.weight : tensor<4096xf32>
    %276 = torch_c.from_builtin_tensor %__auto.blk.30.ffn_norm.weight : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.30.ffn_gate.weight = util.global.load @__auto.blk.30.ffn_gate.weight : tensor<14336x4096xf16>
    %277 = torch_c.from_builtin_tensor %__auto.blk.30.ffn_gate.weight : tensor<14336x4096xf16> -> !torch.vtensor<[14336,4096],f16>
    %__auto.blk.30.ffn_up.weight = util.global.load @__auto.blk.30.ffn_up.weight : tensor<14336x4096xf16>
    %278 = torch_c.from_builtin_tensor %__auto.blk.30.ffn_up.weight : tensor<14336x4096xf16> -> !torch.vtensor<[14336,4096],f16>
    %__auto.blk.30.ffn_down.weight = util.global.load @__auto.blk.30.ffn_down.weight : tensor<4096x14336xf16>
    %279 = torch_c.from_builtin_tensor %__auto.blk.30.ffn_down.weight : tensor<4096x14336xf16> -> !torch.vtensor<[4096,14336],f16>
    %__auto.blk.31.attn_norm.weight = util.global.load @__auto.blk.31.attn_norm.weight : tensor<4096xf32>
    %280 = torch_c.from_builtin_tensor %__auto.blk.31.attn_norm.weight : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.31.attn_q.weight = util.global.load @__auto.blk.31.attn_q.weight : tensor<4096x4096xf16>
    %281 = torch_c.from_builtin_tensor %__auto.blk.31.attn_q.weight : tensor<4096x4096xf16> -> !torch.vtensor<[4096,4096],f16>
    %__auto.blk.31.attn_k.weight = util.global.load @__auto.blk.31.attn_k.weight : tensor<1024x4096xf16>
    %282 = torch_c.from_builtin_tensor %__auto.blk.31.attn_k.weight : tensor<1024x4096xf16> -> !torch.vtensor<[1024,4096],f16>
    %__auto.blk.31.attn_v.weight = util.global.load @__auto.blk.31.attn_v.weight : tensor<1024x4096xf16>
    %283 = torch_c.from_builtin_tensor %__auto.blk.31.attn_v.weight : tensor<1024x4096xf16> -> !torch.vtensor<[1024,4096],f16>
    %__auto.blk.31.attn_output.weight = util.global.load @__auto.blk.31.attn_output.weight : tensor<4096x4096xf16>
    %284 = torch_c.from_builtin_tensor %__auto.blk.31.attn_output.weight : tensor<4096x4096xf16> -> !torch.vtensor<[4096,4096],f16>
    %__auto.blk.31.ffn_norm.weight = util.global.load @__auto.blk.31.ffn_norm.weight : tensor<4096xf32>
    %285 = torch_c.from_builtin_tensor %__auto.blk.31.ffn_norm.weight : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.31.ffn_gate.weight = util.global.load @__auto.blk.31.ffn_gate.weight : tensor<14336x4096xf16>
    %286 = torch_c.from_builtin_tensor %__auto.blk.31.ffn_gate.weight : tensor<14336x4096xf16> -> !torch.vtensor<[14336,4096],f16>
    %__auto.blk.31.ffn_up.weight = util.global.load @__auto.blk.31.ffn_up.weight : tensor<14336x4096xf16>
    %287 = torch_c.from_builtin_tensor %__auto.blk.31.ffn_up.weight : tensor<14336x4096xf16> -> !torch.vtensor<[14336,4096],f16>
    %__auto.blk.31.ffn_down.weight = util.global.load @__auto.blk.31.ffn_down.weight : tensor<4096x14336xf16>
    %288 = torch_c.from_builtin_tensor %__auto.blk.31.ffn_down.weight : tensor<4096x14336xf16> -> !torch.vtensor<[4096,14336],f16>
    %__auto.output_norm.weight = util.global.load @__auto.output_norm.weight : tensor<4096xf32>
    %289 = torch_c.from_builtin_tensor %__auto.output_norm.weight : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.output.weight = util.global.load @__auto.output.weight : tensor<128256x4096xf16>
    %290 = torch_c.from_builtin_tensor %__auto.output.weight : tensor<128256x4096xf16> -> !torch.vtensor<[128256,4096],f16>
    %291 = torch.copy.to_vtensor %arg3 : !torch.vtensor<[?,2097152],f16>
    %292 = torch.symbolic_int "s1" {min_val = 2, max_val = 4095} : !torch.int
    %293 = torch.symbolic_int "s2" {min_val = 2, max_val = 9223372036854775806} : !torch.int
    torch.bind_symbolic_shape %arg0, [%292], affine_map<()[s0] -> (4, s0 * 32)> : !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %arg2, [%292], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %291, [%293], affine_map<()[s0] -> (s0, 2097152)> : !torch.vtensor<[?,2097152],f16>
    %int1 = torch.constant.int 1
    %294 = torch.aten.size.int %arg0, %int1 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.int
    %int0 = torch.constant.int 0
    %int1_0 = torch.constant.int 1
    %none = torch.constant.none
    %none_1 = torch.constant.none
    %cpu = torch.constant.device "cpu"
    %false = torch.constant.bool false
    %295 = torch.aten.arange.start_step %int0, %294, %int1_0, %none, %none_1, %cpu, %false : !torch.int, !torch.int, !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %295, [%292], affine_map<()[s0] -> (s0 * 32)> : !torch.vtensor<[?],si64>
    %int-1 = torch.constant.int -1
    %296 = torch.aten.unsqueeze %arg1, %int-1 : !torch.vtensor<[4],si64>, !torch.int -> !torch.vtensor<[4,1],si64>
    %297 = torch.aten.ge.Tensor %295, %296 : !torch.vtensor<[?],si64>, !torch.vtensor<[4,1],si64> -> !torch.vtensor<[4,?],i1>
    torch.bind_symbolic_shape %297, [%292], affine_map<()[s0] -> (4, s0 * 32)> : !torch.vtensor<[4,?],i1>
    %int1_2 = torch.constant.int 1
    %int1_3 = torch.constant.int 1
    %298 = torch.prim.ListConstruct %int1_2, %int1_3 : (!torch.int, !torch.int) -> !torch.list<int>
    %int11 = torch.constant.int 11
    %none_4 = torch.constant.none
    %cpu_5 = torch.constant.device "cpu"
    %false_6 = torch.constant.bool false
    %299 = torch.aten.ones %298, %int11, %none_4, %cpu_5, %false_6 : !torch.list<int>, !torch.int, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[1,1],i1>
    %int131072 = torch.constant.int 131072
    %int131072_7 = torch.constant.int 131072
    %300 = torch.prim.ListConstruct %int131072, %int131072_7 : (!torch.int, !torch.int) -> !torch.list<int>
    %false_8 = torch.constant.bool false
    %301 = torch.aten.expand %299, %300, %false_8 : !torch.vtensor<[1,1],i1>, !torch.list<int>, !torch.bool -> !torch.vtensor<[131072,131072],i1>
    %int1_9 = torch.constant.int 1
    %302 = torch.aten.triu %301, %int1_9 : !torch.vtensor<[131072,131072],i1>, !torch.int -> !torch.vtensor<[131072,131072],i1>
    %int0_10 = torch.constant.int 0
    %303 = torch.aten.unsqueeze %302, %int0_10 : !torch.vtensor<[131072,131072],i1>, !torch.int -> !torch.vtensor<[1,131072,131072],i1>
    %int1_11 = torch.constant.int 1
    %304 = torch.aten.unsqueeze %303, %int1_11 : !torch.vtensor<[1,131072,131072],i1>, !torch.int -> !torch.vtensor<[1,1,131072,131072],i1>
    %int2 = torch.constant.int 2
    %int0_12 = torch.constant.int 0
    %int9223372036854775807 = torch.constant.int 9223372036854775807
    %int1_13 = torch.constant.int 1
    %305 = torch.aten.slice.Tensor %304, %int2, %int0_12, %int9223372036854775807, %int1_13 : !torch.vtensor<[1,1,131072,131072],i1>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,1,131072,131072],i1>
    %int3 = torch.constant.int 3
    %int0_14 = torch.constant.int 0
    %int9223372036854775807_15 = torch.constant.int 9223372036854775807
    %int1_16 = torch.constant.int 1
    %306 = torch.aten.slice.Tensor %305, %int3, %int0_14, %int9223372036854775807_15, %int1_16 : !torch.vtensor<[1,1,131072,131072],i1>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,1,131072,131072],i1>
    %int0_17 = torch.constant.int 0
    %int0_18 = torch.constant.int 0
    %int9223372036854775807_19 = torch.constant.int 9223372036854775807
    %int1_20 = torch.constant.int 1
    %307 = torch.aten.slice.Tensor %306, %int0_17, %int0_18, %int9223372036854775807_19, %int1_20 : !torch.vtensor<[1,1,131072,131072],i1>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,1,131072,131072],i1>
    %int1_21 = torch.constant.int 1
    %int0_22 = torch.constant.int 0
    %int9223372036854775807_23 = torch.constant.int 9223372036854775807
    %int1_24 = torch.constant.int 1
    %308 = torch.aten.slice.Tensor %307, %int1_21, %int0_22, %int9223372036854775807_23, %int1_24 : !torch.vtensor<[1,1,131072,131072],i1>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,1,131072,131072],i1>
    %int0_25 = torch.constant.int 0
    %309 = torch.aten.size.int %295, %int0_25 : !torch.vtensor<[?],si64>, !torch.int -> !torch.int
    %int2_26 = torch.constant.int 2
    %int0_27 = torch.constant.int 0
    %int1_28 = torch.constant.int 1
    %310 = torch.aten.slice.Tensor %308, %int2_26, %int0_27, %309, %int1_28 : !torch.vtensor<[1,1,131072,131072],i1>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,1,?,131072],i1>
    torch.bind_symbolic_shape %310, [%292], affine_map<()[s0] -> (1, 1, s0 * 32, 131072)> : !torch.vtensor<[1,1,?,131072],i1>
    %int3_29 = torch.constant.int 3
    %int0_30 = torch.constant.int 0
    %int1_31 = torch.constant.int 1
    %311 = torch.aten.slice.Tensor %310, %int3_29, %int0_30, %309, %int1_31 : !torch.vtensor<[1,1,?,131072],i1>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,1,?,?],i1>
    torch.bind_symbolic_shape %311, [%292], affine_map<()[s0] -> (1, 1, s0 * 32, s0 * 32)> : !torch.vtensor<[1,1,?,?],i1>
    %int0_32 = torch.constant.int 0
    %int0_33 = torch.constant.int 0
    %int9223372036854775807_34 = torch.constant.int 9223372036854775807
    %int1_35 = torch.constant.int 1
    %312 = torch.aten.slice.Tensor %297, %int0_32, %int0_33, %int9223372036854775807_34, %int1_35 : !torch.vtensor<[4,?],i1>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[4,?],i1>
    torch.bind_symbolic_shape %312, [%292], affine_map<()[s0] -> (4, s0 * 32)> : !torch.vtensor<[4,?],i1>
    %int1_36 = torch.constant.int 1
    %313 = torch.aten.unsqueeze %312, %int1_36 : !torch.vtensor<[4,?],i1>, !torch.int -> !torch.vtensor<[4,1,?],i1>
    torch.bind_symbolic_shape %313, [%292], affine_map<()[s0] -> (4, 1, s0 * 32)> : !torch.vtensor<[4,1,?],i1>
    %int2_37 = torch.constant.int 2
    %314 = torch.aten.unsqueeze %313, %int2_37 : !torch.vtensor<[4,1,?],i1>, !torch.int -> !torch.vtensor<[4,1,1,?],i1>
    torch.bind_symbolic_shape %314, [%292], affine_map<()[s0] -> (4, 1, 1, s0 * 32)> : !torch.vtensor<[4,1,1,?],i1>
    %int3_38 = torch.constant.int 3
    %int0_39 = torch.constant.int 0
    %int9223372036854775807_40 = torch.constant.int 9223372036854775807
    %int1_41 = torch.constant.int 1
    %315 = torch.aten.slice.Tensor %314, %int3_38, %int0_39, %int9223372036854775807_40, %int1_41 : !torch.vtensor<[4,1,1,?],i1>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[4,1,1,?],i1>
    torch.bind_symbolic_shape %315, [%292], affine_map<()[s0] -> (4, 1, 1, s0 * 32)> : !torch.vtensor<[4,1,1,?],i1>
    %316 = torch.aten.logical_or %311, %315 : !torch.vtensor<[1,1,?,?],i1>, !torch.vtensor<[4,1,1,?],i1> -> !torch.vtensor<[4,1,?,?],i1>
    torch.bind_symbolic_shape %316, [%292], affine_map<()[s0] -> (4, 1, s0 * 32, s0 * 32)> : !torch.vtensor<[4,1,?,?],i1>
    %int0_42 = torch.constant.int 0
    %int6 = torch.constant.int 6
    %int0_43 = torch.constant.int 0
    %cpu_44 = torch.constant.device "cpu"
    %none_45 = torch.constant.none
    %317 = torch.aten.scalar_tensor %int0_42, %int6, %int0_43, %cpu_44, %none_45 : !torch.int, !torch.int, !torch.int, !torch.Device, !torch.none -> !torch.vtensor<[],f32>
    %float-Inf = torch.constant.float 0xFFF0000000000000
    %int6_46 = torch.constant.int 6
    %int0_47 = torch.constant.int 0
    %cpu_48 = torch.constant.device "cpu"
    %none_49 = torch.constant.none
    %318 = torch.aten.scalar_tensor %float-Inf, %int6_46, %int0_47, %cpu_48, %none_49 : !torch.float, !torch.int, !torch.int, !torch.Device, !torch.none -> !torch.vtensor<[],f32>
    %319 = torch.aten.where.self %316, %318, %317 : !torch.vtensor<[4,1,?,?],i1>, !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[4,1,?,?],f32>
    torch.bind_symbolic_shape %319, [%292], affine_map<()[s0] -> (4, 1, s0 * 32, s0 * 32)> : !torch.vtensor<[4,1,?,?],f32>
    %int5 = torch.constant.int 5
    %320 = torch.prims.convert_element_type %319, %int5 : !torch.vtensor<[4,1,?,?],f32>, !torch.int -> !torch.vtensor<[4,1,?,?],f16>
    torch.bind_symbolic_shape %320, [%292], affine_map<()[s0] -> (4, 1, s0 * 32, s0 * 32)> : !torch.vtensor<[4,1,?,?],f16>
    %int-1_50 = torch.constant.int -1
    %false_51 = torch.constant.bool false
    %false_52 = torch.constant.bool false
    %321 = torch.aten.embedding %0, %arg0, %int-1_50, %false_51, %false_52 : !torch.vtensor<[128256,4096],f16>, !torch.vtensor<[4,?],si64>, !torch.int, !torch.bool, !torch.bool -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %321, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int6_53 = torch.constant.int 6
    %322 = torch.prims.convert_element_type %321, %int6_53 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %322, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_54 = torch.constant.int 2
    %323 = torch.aten.pow.Tensor_Scalar %322, %int2_54 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %323, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int-1_55 = torch.constant.int -1
    %324 = torch.prim.ListConstruct %int-1_55 : (!torch.int) -> !torch.list<int>
    %true = torch.constant.bool true
    %none_56 = torch.constant.none
    %325 = torch.aten.mean.dim %323, %324, %true, %none_56 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %325, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06 = torch.constant.float 9.9999997473787516E-6
    %int1_57 = torch.constant.int 1
    %326 = torch.aten.add.Scalar %325, %float9.999990e-06, %int1_57 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %326, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %327 = torch.aten.rsqrt %326 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %327, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %328 = torch.aten.mul.Tensor %322, %327 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %328, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %329 = torch.aten.mul.Tensor %1, %328 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %329, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int5_58 = torch.constant.int 5
    %330 = torch.prims.convert_element_type %329, %int5_58 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %330, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int-2 = torch.constant.int -2
    %int-1_59 = torch.constant.int -1
    %331 = torch.aten.transpose.int %2, %int-2, %int-1_59 : !torch.vtensor<[4096,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,4096],f16>
    %int4 = torch.constant.int 4
    %332 = torch.aten.mul.int %int4, %294 : !torch.int, !torch.int -> !torch.int
    %int4096 = torch.constant.int 4096
    %333 = torch.prim.ListConstruct %332, %int4096 : (!torch.int, !torch.int) -> !torch.list<int>
    %334 = torch.aten.view %330, %333 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %334, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %335 = torch.aten.mm %334, %331 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %335, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_60 = torch.constant.int 4
    %int4096_61 = torch.constant.int 4096
    %336 = torch.prim.ListConstruct %int4_60, %294, %int4096_61 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %337 = torch.aten.view %335, %336 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %337, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int-2_62 = torch.constant.int -2
    %int-1_63 = torch.constant.int -1
    %338 = torch.aten.transpose.int %3, %int-2_62, %int-1_63 : !torch.vtensor<[1024,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,1024],f16>
    %int4_64 = torch.constant.int 4
    %339 = torch.aten.mul.int %int4_64, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_65 = torch.constant.int 4096
    %340 = torch.prim.ListConstruct %339, %int4096_65 : (!torch.int, !torch.int) -> !torch.list<int>
    %341 = torch.aten.view %330, %340 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %341, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %342 = torch.aten.mm %341, %338 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1024],f16> -> !torch.vtensor<[?,1024],f16>
    torch.bind_symbolic_shape %342, [%292], affine_map<()[s0] -> (s0 * 128, 1024)> : !torch.vtensor<[?,1024],f16>
    %int4_66 = torch.constant.int 4
    %int1024 = torch.constant.int 1024
    %343 = torch.prim.ListConstruct %int4_66, %294, %int1024 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %344 = torch.aten.view %342, %343 : !torch.vtensor<[?,1024],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1024],f16>
    torch.bind_symbolic_shape %344, [%292], affine_map<()[s0] -> (4, s0 * 32, 1024)> : !torch.vtensor<[4,?,1024],f16>
    %int-2_67 = torch.constant.int -2
    %int-1_68 = torch.constant.int -1
    %345 = torch.aten.transpose.int %4, %int-2_67, %int-1_68 : !torch.vtensor<[1024,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,1024],f16>
    %int4_69 = torch.constant.int 4
    %346 = torch.aten.mul.int %int4_69, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_70 = torch.constant.int 4096
    %347 = torch.prim.ListConstruct %346, %int4096_70 : (!torch.int, !torch.int) -> !torch.list<int>
    %348 = torch.aten.view %330, %347 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %348, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %349 = torch.aten.mm %348, %345 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1024],f16> -> !torch.vtensor<[?,1024],f16>
    torch.bind_symbolic_shape %349, [%292], affine_map<()[s0] -> (s0 * 128, 1024)> : !torch.vtensor<[?,1024],f16>
    %int4_71 = torch.constant.int 4
    %int1024_72 = torch.constant.int 1024
    %350 = torch.prim.ListConstruct %int4_71, %294, %int1024_72 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %351 = torch.aten.view %349, %350 : !torch.vtensor<[?,1024],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1024],f16>
    torch.bind_symbolic_shape %351, [%292], affine_map<()[s0] -> (4, s0 * 32, 1024)> : !torch.vtensor<[4,?,1024],f16>
    %int4_73 = torch.constant.int 4
    %int32 = torch.constant.int 32
    %int128 = torch.constant.int 128
    %352 = torch.prim.ListConstruct %int4_73, %294, %int32, %int128 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %353 = torch.aten.view %337, %352 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %353, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int4_74 = torch.constant.int 4
    %int8 = torch.constant.int 8
    %int128_75 = torch.constant.int 128
    %354 = torch.prim.ListConstruct %int4_74, %294, %int8, %int128_75 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %355 = torch.aten.view %344, %354 : !torch.vtensor<[4,?,1024],f16>, !torch.list<int> -> !torch.vtensor<[4,?,8,128],f16>
    torch.bind_symbolic_shape %355, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 128)> : !torch.vtensor<[4,?,8,128],f16>
    %int4_76 = torch.constant.int 4
    %int8_77 = torch.constant.int 8
    %int128_78 = torch.constant.int 128
    %356 = torch.prim.ListConstruct %int4_76, %294, %int8_77, %int128_78 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %357 = torch.aten.view %351, %356 : !torch.vtensor<[4,?,1024],f16>, !torch.list<int> -> !torch.vtensor<[4,?,8,128],f16>
    torch.bind_symbolic_shape %357, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 128)> : !torch.vtensor<[4,?,8,128],f16>
    %int131072_79 = torch.constant.int 131072
    %none_80 = torch.constant.none
    %none_81 = torch.constant.none
    %cpu_82 = torch.constant.device "cpu"
    %false_83 = torch.constant.bool false
    %358 = torch.aten.arange %int131072_79, %none_80, %none_81, %cpu_82, %false_83 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64>
    %int0_84 = torch.constant.int 0
    %int128_85 = torch.constant.int 128
    %int2_86 = torch.constant.int 2
    %none_87 = torch.constant.none
    %none_88 = torch.constant.none
    %cpu_89 = torch.constant.device "cpu"
    %false_90 = torch.constant.bool false
    %359 = torch.aten.arange.start_step %int0_84, %int128_85, %int2_86, %none_87, %none_88, %cpu_89, %false_90 : !torch.int, !torch.int, !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64>
    %int0_91 = torch.constant.int 0
    %int0_92 = torch.constant.int 0
    %int64 = torch.constant.int 64
    %int1_93 = torch.constant.int 1
    %360 = torch.aten.slice.Tensor %359, %int0_91, %int0_92, %int64, %int1_93 : !torch.vtensor<[64],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[64],si64>
    %int6_94 = torch.constant.int 6
    %361 = torch.prims.convert_element_type %360, %int6_94 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32>
    %int128_95 = torch.constant.int 128
    %362 = torch.aten.div.Scalar %361, %int128_95 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32>
    %float5.000000e05 = torch.constant.float 5.000000e+05
    %363 = torch.aten.pow.Scalar %float5.000000e05, %362 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %364 = torch.aten.reciprocal %363 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %float1.000000e00 = torch.constant.float 1.000000e+00
    %365 = torch.aten.mul.Scalar %364, %float1.000000e00 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32>
    %int131072_96 = torch.constant.int 131072
    %int1_97 = torch.constant.int 1
    %366 = torch.prim.ListConstruct %int131072_96, %int1_97 : (!torch.int, !torch.int) -> !torch.list<int>
    %367 = torch.aten.view %358, %366 : !torch.vtensor<[131072],si64>, !torch.list<int> -> !torch.vtensor<[131072,1],si64>
    %368 = torch.aten.mul.Tensor %367, %365 : !torch.vtensor<[131072,1],si64>, !torch.vtensor<[64],f32> -> !torch.vtensor<[131072,64],f32>
    %369 = torch.aten.cos %368 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %370 = torch.aten.sin %368 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %371 = torch.aten.complex %369, %370 : !torch.vtensor<[131072,64],f32>, !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],complex<f32>>
    %int1_98 = torch.constant.int 1
    %372 = torch.aten.size.int %337, %int1_98 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.int
    %int0_99 = torch.constant.int 0
    %373 = torch.aten.add.int %int0_99, %372 : !torch.int, !torch.int -> !torch.int
    %int0_100 = torch.constant.int 0
    %int0_101 = torch.constant.int 0
    %int1_102 = torch.constant.int 1
    %374 = torch.aten.slice.Tensor %371, %int0_100, %int0_101, %373, %int1_102 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %374, [%292], affine_map<()[s0] -> (s0 * 32, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_103 = torch.constant.int 1
    %int0_104 = torch.constant.int 0
    %int9223372036854775807_105 = torch.constant.int 9223372036854775807
    %int1_106 = torch.constant.int 1
    %375 = torch.aten.slice.Tensor %374, %int1_103, %int0_104, %int9223372036854775807_105, %int1_106 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %375, [%292], affine_map<()[s0] -> (s0 * 32, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_107 = torch.constant.int 0
    %376 = torch.aten.unsqueeze %375, %int0_107 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %376, [%292], affine_map<()[s0] -> (1, s0 * 32, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_108 = torch.constant.int 2
    %377 = torch.aten.unsqueeze %376, %int2_108 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %377, [%292], affine_map<()[s0] -> (1, s0 * 32, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_109 = torch.constant.int 3
    %int0_110 = torch.constant.int 0
    %int9223372036854775807_111 = torch.constant.int 9223372036854775807
    %int1_112 = torch.constant.int 1
    %378 = torch.aten.slice.Tensor %377, %int3_109, %int0_110, %int9223372036854775807_111, %int1_112 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %378, [%292], affine_map<()[s0] -> (1, s0 * 32, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %379 = torch_c.to_builtin_tensor %353 : !torch.vtensor<[4,?,32,128],f16> -> tensor<4x?x32x128xf16>
    %c1 = arith.constant 1 : index
    %dim = tensor.dim %379, %c1 : tensor<4x?x32x128xf16>
    %380 = flow.tensor.bitcast %379 : tensor<4x?x32x128xf16>{%dim} -> tensor<4x?x32x64xcomplex<f16>>{%dim}
    %381 = torch_c.from_builtin_tensor %380 : tensor<4x?x32x64xcomplex<f16>> -> !torch.vtensor<[4,?,32,64],complex<f16>>
    torch.bind_symbolic_shape %381, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 64)> : !torch.vtensor<[4,?,32,64],complex<f16>>
    %382 = torch.aten.mul.Tensor %381, %378 : !torch.vtensor<[4,?,32,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,32,64],complex<f32>>
    torch.bind_symbolic_shape %382, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 64)> : !torch.vtensor<[4,?,32,64],complex<f32>>
    %383 = torch_c.to_builtin_tensor %382 : !torch.vtensor<[4,?,32,64],complex<f32>> -> tensor<4x?x32x64xcomplex<f32>>
    %c1_113 = arith.constant 1 : index
    %dim_114 = tensor.dim %383, %c1_113 : tensor<4x?x32x64xcomplex<f32>>
    %384 = flow.tensor.bitcast %383 : tensor<4x?x32x64xcomplex<f32>>{%dim_114} -> tensor<4x?x32x128xf32>{%dim_114}
    %385 = torch_c.from_builtin_tensor %384 : tensor<4x?x32x128xf32> -> !torch.vtensor<[4,?,32,128],f32>
    torch.bind_symbolic_shape %385, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f32>
    %int5_115 = torch.constant.int 5
    %386 = torch.prims.convert_element_type %385, %int5_115 : !torch.vtensor<[4,?,32,128],f32>, !torch.int -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %386, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int131072_116 = torch.constant.int 131072
    %none_117 = torch.constant.none
    %none_118 = torch.constant.none
    %cpu_119 = torch.constant.device "cpu"
    %false_120 = torch.constant.bool false
    %387 = torch.aten.arange %int131072_116, %none_117, %none_118, %cpu_119, %false_120 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64>
    %int0_121 = torch.constant.int 0
    %int128_122 = torch.constant.int 128
    %int2_123 = torch.constant.int 2
    %none_124 = torch.constant.none
    %none_125 = torch.constant.none
    %cpu_126 = torch.constant.device "cpu"
    %false_127 = torch.constant.bool false
    %388 = torch.aten.arange.start_step %int0_121, %int128_122, %int2_123, %none_124, %none_125, %cpu_126, %false_127 : !torch.int, !torch.int, !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64>
    %int0_128 = torch.constant.int 0
    %int0_129 = torch.constant.int 0
    %int64_130 = torch.constant.int 64
    %int1_131 = torch.constant.int 1
    %389 = torch.aten.slice.Tensor %388, %int0_128, %int0_129, %int64_130, %int1_131 : !torch.vtensor<[64],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[64],si64>
    %int6_132 = torch.constant.int 6
    %390 = torch.prims.convert_element_type %389, %int6_132 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32>
    %int128_133 = torch.constant.int 128
    %391 = torch.aten.div.Scalar %390, %int128_133 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32>
    %float5.000000e05_134 = torch.constant.float 5.000000e+05
    %392 = torch.aten.pow.Scalar %float5.000000e05_134, %391 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %393 = torch.aten.reciprocal %392 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %float1.000000e00_135 = torch.constant.float 1.000000e+00
    %394 = torch.aten.mul.Scalar %393, %float1.000000e00_135 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32>
    %int131072_136 = torch.constant.int 131072
    %int1_137 = torch.constant.int 1
    %395 = torch.prim.ListConstruct %int131072_136, %int1_137 : (!torch.int, !torch.int) -> !torch.list<int>
    %396 = torch.aten.view %387, %395 : !torch.vtensor<[131072],si64>, !torch.list<int> -> !torch.vtensor<[131072,1],si64>
    %397 = torch.aten.mul.Tensor %396, %394 : !torch.vtensor<[131072,1],si64>, !torch.vtensor<[64],f32> -> !torch.vtensor<[131072,64],f32>
    %398 = torch.aten.cos %397 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %399 = torch.aten.sin %397 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %400 = torch.aten.complex %398, %399 : !torch.vtensor<[131072,64],f32>, !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],complex<f32>>
    %int1_138 = torch.constant.int 1
    %401 = torch.aten.size.int %344, %int1_138 : !torch.vtensor<[4,?,1024],f16>, !torch.int -> !torch.int
    %int0_139 = torch.constant.int 0
    %402 = torch.aten.add.int %int0_139, %401 : !torch.int, !torch.int -> !torch.int
    %int0_140 = torch.constant.int 0
    %int0_141 = torch.constant.int 0
    %int1_142 = torch.constant.int 1
    %403 = torch.aten.slice.Tensor %400, %int0_140, %int0_141, %402, %int1_142 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %403, [%292], affine_map<()[s0] -> (s0 * 32, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_143 = torch.constant.int 1
    %int0_144 = torch.constant.int 0
    %int9223372036854775807_145 = torch.constant.int 9223372036854775807
    %int1_146 = torch.constant.int 1
    %404 = torch.aten.slice.Tensor %403, %int1_143, %int0_144, %int9223372036854775807_145, %int1_146 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %404, [%292], affine_map<()[s0] -> (s0 * 32, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_147 = torch.constant.int 0
    %405 = torch.aten.unsqueeze %404, %int0_147 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %405, [%292], affine_map<()[s0] -> (1, s0 * 32, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_148 = torch.constant.int 2
    %406 = torch.aten.unsqueeze %405, %int2_148 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %406, [%292], affine_map<()[s0] -> (1, s0 * 32, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_149 = torch.constant.int 3
    %int0_150 = torch.constant.int 0
    %int9223372036854775807_151 = torch.constant.int 9223372036854775807
    %int1_152 = torch.constant.int 1
    %407 = torch.aten.slice.Tensor %406, %int3_149, %int0_150, %int9223372036854775807_151, %int1_152 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %407, [%292], affine_map<()[s0] -> (1, s0 * 32, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %408 = torch_c.to_builtin_tensor %355 : !torch.vtensor<[4,?,8,128],f16> -> tensor<4x?x8x128xf16>
    %c1_153 = arith.constant 1 : index
    %dim_154 = tensor.dim %408, %c1_153 : tensor<4x?x8x128xf16>
    %409 = flow.tensor.bitcast %408 : tensor<4x?x8x128xf16>{%dim_154} -> tensor<4x?x8x64xcomplex<f16>>{%dim_154}
    %410 = torch_c.from_builtin_tensor %409 : tensor<4x?x8x64xcomplex<f16>> -> !torch.vtensor<[4,?,8,64],complex<f16>>
    torch.bind_symbolic_shape %410, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 64)> : !torch.vtensor<[4,?,8,64],complex<f16>>
    %411 = torch.aten.mul.Tensor %410, %407 : !torch.vtensor<[4,?,8,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,8,64],complex<f32>>
    torch.bind_symbolic_shape %411, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 64)> : !torch.vtensor<[4,?,8,64],complex<f32>>
    %412 = torch_c.to_builtin_tensor %411 : !torch.vtensor<[4,?,8,64],complex<f32>> -> tensor<4x?x8x64xcomplex<f32>>
    %c1_155 = arith.constant 1 : index
    %dim_156 = tensor.dim %412, %c1_155 : tensor<4x?x8x64xcomplex<f32>>
    %413 = flow.tensor.bitcast %412 : tensor<4x?x8x64xcomplex<f32>>{%dim_156} -> tensor<4x?x8x128xf32>{%dim_156}
    %414 = torch_c.from_builtin_tensor %413 : tensor<4x?x8x128xf32> -> !torch.vtensor<[4,?,8,128],f32>
    torch.bind_symbolic_shape %414, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 128)> : !torch.vtensor<[4,?,8,128],f32>
    %int5_157 = torch.constant.int 5
    %415 = torch.prims.convert_element_type %414, %int5_157 : !torch.vtensor<[4,?,8,128],f32>, !torch.int -> !torch.vtensor<[4,?,8,128],f16>
    torch.bind_symbolic_shape %415, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 128)> : !torch.vtensor<[4,?,8,128],f16>
    %int0_158 = torch.constant.int 0
    %416 = torch.aten.size.int %291, %int0_158 : !torch.vtensor<[?,2097152],f16>, !torch.int -> !torch.int
    %int32_159 = torch.constant.int 32
    %int2_160 = torch.constant.int 2
    %int32_161 = torch.constant.int 32
    %int8_162 = torch.constant.int 8
    %int128_163 = torch.constant.int 128
    %417 = torch.prim.ListConstruct %416, %int32_159, %int2_160, %int32_161, %int8_162, %int128_163 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %418 = torch.aten.view %291, %417 : !torch.vtensor<[?,2097152],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f16>
    torch.bind_symbolic_shape %418, [%293], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f16>
    %int32_164 = torch.constant.int 32
    %419 = torch.aten.mul.int %416, %int32_164 : !torch.int, !torch.int -> !torch.int
    %int2_165 = torch.constant.int 2
    %420 = torch.aten.mul.int %419, %int2_165 : !torch.int, !torch.int -> !torch.int
    %int32_166 = torch.constant.int 32
    %int8_167 = torch.constant.int 8
    %int128_168 = torch.constant.int 128
    %421 = torch.prim.ListConstruct %420, %int32_166, %int8_167, %int128_168 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %422 = torch.aten.view %418, %421 : !torch.vtensor<[?,32,2,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %422, [%293], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int64_169 = torch.constant.int 64
    %423 = torch.aten.mul.Scalar %arg2, %int64_169 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %423, [%292], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int0_170 = torch.constant.int 0
    %int1_171 = torch.constant.int 1
    %424 = torch.aten.add.Scalar %423, %int0_170, %int1_171 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %424, [%292], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_172 = torch.constant.int 1
    %425 = torch.aten.size.int %arg2, %int1_172 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.int
    %int4_173 = torch.constant.int 4
    %int32_174 = torch.constant.int 32
    %int8_175 = torch.constant.int 8
    %int128_176 = torch.constant.int 128
    %426 = torch.prim.ListConstruct %int4_173, %425, %int32_174, %int8_175, %int128_176 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %427 = torch.aten.view %415, %426 : !torch.vtensor<[4,?,8,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,8,128],f16>
    torch.bind_symbolic_shape %427, [%292], affine_map<()[s0] -> (4, s0, 32, 8, 128)> : !torch.vtensor<[4,?,32,8,128],f16>
    %int4_177 = torch.constant.int 4
    %428 = torch.aten.mul.int %int4_177, %425 : !torch.int, !torch.int -> !torch.int
    %int32_178 = torch.constant.int 32
    %int8_179 = torch.constant.int 8
    %int128_180 = torch.constant.int 128
    %429 = torch.prim.ListConstruct %428, %int32_178, %int8_179, %int128_180 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %430 = torch.aten.view %427, %429 : !torch.vtensor<[4,?,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %430, [%292], affine_map<()[s0] -> (s0 * 4, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int4_181 = torch.constant.int 4
    %431 = torch.aten.mul.int %int4_181, %425 : !torch.int, !torch.int -> !torch.int
    %432 = torch.prim.ListConstruct %431 : (!torch.int) -> !torch.list<int>
    %433 = torch.aten.view %424, %432 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %433, [%292], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %434 = torch.prim.ListConstruct %433 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_182 = torch.constant.bool false
    %435 = torch.aten.index_put %422, %434, %430, %false_182 : !torch.vtensor<[?,32,8,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,32,8,128],f16>, !torch.bool -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %435, [%293], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int32_183 = torch.constant.int 32
    %int2_184 = torch.constant.int 2
    %int32_185 = torch.constant.int 32
    %int8_186 = torch.constant.int 8
    %int128_187 = torch.constant.int 128
    %436 = torch.prim.ListConstruct %416, %int32_183, %int2_184, %int32_185, %int8_186, %int128_187 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %437 = torch.aten.view %435, %436 : !torch.vtensor<[?,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f16>
    torch.bind_symbolic_shape %437, [%293], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f16>
    %int2097152 = torch.constant.int 2097152
    %438 = torch.prim.ListConstruct %416, %int2097152 : (!torch.int, !torch.int) -> !torch.list<int>
    %439 = torch.aten.view %437, %438 : !torch.vtensor<[?,32,2,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,2097152],f16>
    torch.bind_symbolic_shape %439, [%293], affine_map<()[s0] -> (s0, 2097152)> : !torch.vtensor<[?,2097152],f16>
    %int32_188 = torch.constant.int 32
    %int2_189 = torch.constant.int 2
    %int32_190 = torch.constant.int 32
    %int8_191 = torch.constant.int 8
    %int128_192 = torch.constant.int 128
    %440 = torch.prim.ListConstruct %416, %int32_188, %int2_189, %int32_190, %int8_191, %int128_192 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %441 = torch.aten.view %439, %440 : !torch.vtensor<[?,2097152],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f16>
    torch.bind_symbolic_shape %441, [%293], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f16>
    %int32_193 = torch.constant.int 32
    %int8_194 = torch.constant.int 8
    %int128_195 = torch.constant.int 128
    %442 = torch.prim.ListConstruct %420, %int32_193, %int8_194, %int128_195 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %443 = torch.aten.view %441, %442 : !torch.vtensor<[?,32,2,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %443, [%293], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int4_196 = torch.constant.int 4
    %int32_197 = torch.constant.int 32
    %int8_198 = torch.constant.int 8
    %int128_199 = torch.constant.int 128
    %444 = torch.prim.ListConstruct %int4_196, %425, %int32_197, %int8_198, %int128_199 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %445 = torch.aten.view %357, %444 : !torch.vtensor<[4,?,8,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,8,128],f16>
    torch.bind_symbolic_shape %445, [%292], affine_map<()[s0] -> (4, s0, 32, 8, 128)> : !torch.vtensor<[4,?,32,8,128],f16>
    %int4_200 = torch.constant.int 4
    %446 = torch.aten.mul.int %int4_200, %425 : !torch.int, !torch.int -> !torch.int
    %int32_201 = torch.constant.int 32
    %int8_202 = torch.constant.int 8
    %int128_203 = torch.constant.int 128
    %447 = torch.prim.ListConstruct %446, %int32_201, %int8_202, %int128_203 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %448 = torch.aten.view %445, %447 : !torch.vtensor<[4,?,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %448, [%292], affine_map<()[s0] -> (s0 * 4, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int1_204 = torch.constant.int 1
    %int1_205 = torch.constant.int 1
    %449 = torch.aten.add.Scalar %424, %int1_204, %int1_205 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %449, [%292], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int4_206 = torch.constant.int 4
    %450 = torch.aten.mul.int %int4_206, %425 : !torch.int, !torch.int -> !torch.int
    %451 = torch.prim.ListConstruct %450 : (!torch.int) -> !torch.list<int>
    %452 = torch.aten.view %449, %451 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %452, [%292], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %453 = torch.prim.ListConstruct %452 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_207 = torch.constant.bool false
    %454 = torch.aten.index_put %443, %453, %448, %false_207 : !torch.vtensor<[?,32,8,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,32,8,128],f16>, !torch.bool -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %454, [%293], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int32_208 = torch.constant.int 32
    %int2_209 = torch.constant.int 2
    %int32_210 = torch.constant.int 32
    %int8_211 = torch.constant.int 8
    %int128_212 = torch.constant.int 128
    %455 = torch.prim.ListConstruct %416, %int32_208, %int2_209, %int32_210, %int8_211, %int128_212 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %456 = torch.aten.view %454, %455 : !torch.vtensor<[?,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f16>
    torch.bind_symbolic_shape %456, [%293], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f16>
    %int2097152_213 = torch.constant.int 2097152
    %457 = torch.prim.ListConstruct %416, %int2097152_213 : (!torch.int, !torch.int) -> !torch.list<int>
    %458 = torch.aten.view %456, %457 : !torch.vtensor<[?,32,2,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,2097152],f16>
    torch.bind_symbolic_shape %458, [%293], affine_map<()[s0] -> (s0, 2097152)> : !torch.vtensor<[?,2097152],f16>
    %int-2_214 = torch.constant.int -2
    %459 = torch.aten.unsqueeze %415, %int-2_214 : !torch.vtensor<[4,?,8,128],f16>, !torch.int -> !torch.vtensor<[4,?,8,1,128],f16>
    torch.bind_symbolic_shape %459, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 1, 128)> : !torch.vtensor<[4,?,8,1,128],f16>
    %int4_215 = torch.constant.int 4
    %int8_216 = torch.constant.int 8
    %int4_217 = torch.constant.int 4
    %int128_218 = torch.constant.int 128
    %460 = torch.prim.ListConstruct %int4_215, %401, %int8_216, %int4_217, %int128_218 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_219 = torch.constant.bool false
    %461 = torch.aten.expand %459, %460, %false_219 : !torch.vtensor<[4,?,8,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,8,4,128],f16>
    torch.bind_symbolic_shape %461, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 4, 128)> : !torch.vtensor<[4,?,8,4,128],f16>
    %int0_220 = torch.constant.int 0
    %462 = torch.aten.clone %461, %int0_220 : !torch.vtensor<[4,?,8,4,128],f16>, !torch.int -> !torch.vtensor<[4,?,8,4,128],f16>
    torch.bind_symbolic_shape %462, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 4, 128)> : !torch.vtensor<[4,?,8,4,128],f16>
    %int4_221 = torch.constant.int 4
    %int32_222 = torch.constant.int 32
    %int128_223 = torch.constant.int 128
    %463 = torch.prim.ListConstruct %int4_221, %401, %int32_222, %int128_223 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %464 = torch.aten._unsafe_view %462, %463 : !torch.vtensor<[4,?,8,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %464, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int-2_224 = torch.constant.int -2
    %465 = torch.aten.unsqueeze %357, %int-2_224 : !torch.vtensor<[4,?,8,128],f16>, !torch.int -> !torch.vtensor<[4,?,8,1,128],f16>
    torch.bind_symbolic_shape %465, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 1, 128)> : !torch.vtensor<[4,?,8,1,128],f16>
    %int1_225 = torch.constant.int 1
    %466 = torch.aten.size.int %351, %int1_225 : !torch.vtensor<[4,?,1024],f16>, !torch.int -> !torch.int
    %int4_226 = torch.constant.int 4
    %int8_227 = torch.constant.int 8
    %int4_228 = torch.constant.int 4
    %int128_229 = torch.constant.int 128
    %467 = torch.prim.ListConstruct %int4_226, %466, %int8_227, %int4_228, %int128_229 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_230 = torch.constant.bool false
    %468 = torch.aten.expand %465, %467, %false_230 : !torch.vtensor<[4,?,8,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,8,4,128],f16>
    torch.bind_symbolic_shape %468, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 4, 128)> : !torch.vtensor<[4,?,8,4,128],f16>
    %int0_231 = torch.constant.int 0
    %469 = torch.aten.clone %468, %int0_231 : !torch.vtensor<[4,?,8,4,128],f16>, !torch.int -> !torch.vtensor<[4,?,8,4,128],f16>
    torch.bind_symbolic_shape %469, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 4, 128)> : !torch.vtensor<[4,?,8,4,128],f16>
    %int4_232 = torch.constant.int 4
    %int32_233 = torch.constant.int 32
    %int128_234 = torch.constant.int 128
    %470 = torch.prim.ListConstruct %int4_232, %466, %int32_233, %int128_234 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %471 = torch.aten._unsafe_view %469, %470 : !torch.vtensor<[4,?,8,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %471, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int1_235 = torch.constant.int 1
    %int2_236 = torch.constant.int 2
    %472 = torch.aten.transpose.int %386, %int1_235, %int2_236 : !torch.vtensor<[4,?,32,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,32,?,128],f16>
    torch.bind_symbolic_shape %472, [%292], affine_map<()[s0] -> (4, 32, s0 * 32, 128)> : !torch.vtensor<[4,32,?,128],f16>
    %int1_237 = torch.constant.int 1
    %int2_238 = torch.constant.int 2
    %473 = torch.aten.transpose.int %464, %int1_237, %int2_238 : !torch.vtensor<[4,?,32,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,32,?,128],f16>
    torch.bind_symbolic_shape %473, [%292], affine_map<()[s0] -> (4, 32, s0 * 32, 128)> : !torch.vtensor<[4,32,?,128],f16>
    %int1_239 = torch.constant.int 1
    %int2_240 = torch.constant.int 2
    %474 = torch.aten.transpose.int %471, %int1_239, %int2_240 : !torch.vtensor<[4,?,32,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,32,?,128],f16>
    torch.bind_symbolic_shape %474, [%292], affine_map<()[s0] -> (4, 32, s0 * 32, 128)> : !torch.vtensor<[4,32,?,128],f16>
    %float0.000000e00 = torch.constant.float 0.000000e+00
    %false_241 = torch.constant.bool false
    %none_242 = torch.constant.none
    %475:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%472, %473, %474, %float0.000000e00, %false_241, %320, %none_242) : (!torch.vtensor<[4,32,?,128],f16>, !torch.vtensor<[4,32,?,128],f16>, !torch.vtensor<[4,32,?,128],f16>, !torch.float, !torch.bool, !torch.vtensor<[4,1,?,?],f16>, !torch.none) -> (!torch.vtensor<[4,32,?,128],f16>, !torch.vtensor<[4,32,?],f32>) 
    torch.bind_symbolic_shape %475#0, [%292], affine_map<()[s0] -> (4, 32, s0 * 32, 128)> : !torch.vtensor<[4,32,?,128],f16>
    %int1_243 = torch.constant.int 1
    %int2_244 = torch.constant.int 2
    %476 = torch.aten.transpose.int %475#0, %int1_243, %int2_244 : !torch.vtensor<[4,32,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %476, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int4_245 = torch.constant.int 4
    %int4096_246 = torch.constant.int 4096
    %477 = torch.prim.ListConstruct %int4_245, %372, %int4096_246 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %478 = torch.aten.view %476, %477 : !torch.vtensor<[4,?,32,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %478, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int-2_247 = torch.constant.int -2
    %int-1_248 = torch.constant.int -1
    %479 = torch.aten.transpose.int %5, %int-2_247, %int-1_248 : !torch.vtensor<[4096,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,4096],f16>
    %int4_249 = torch.constant.int 4
    %480 = torch.aten.mul.int %int4_249, %372 : !torch.int, !torch.int -> !torch.int
    %int4096_250 = torch.constant.int 4096
    %481 = torch.prim.ListConstruct %480, %int4096_250 : (!torch.int, !torch.int) -> !torch.list<int>
    %482 = torch.aten.view %478, %481 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %482, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %483 = torch.aten.mm %482, %479 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %483, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_251 = torch.constant.int 4
    %int4096_252 = torch.constant.int 4096
    %484 = torch.prim.ListConstruct %int4_251, %372, %int4096_252 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %485 = torch.aten.view %483, %484 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %485, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_253 = torch.constant.int 1
    %486 = torch.aten.add.Tensor %321, %485, %int1_253 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %486, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int6_254 = torch.constant.int 6
    %487 = torch.prims.convert_element_type %486, %int6_254 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %487, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_255 = torch.constant.int 2
    %488 = torch.aten.pow.Tensor_Scalar %487, %int2_255 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %488, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int-1_256 = torch.constant.int -1
    %489 = torch.prim.ListConstruct %int-1_256 : (!torch.int) -> !torch.list<int>
    %true_257 = torch.constant.bool true
    %none_258 = torch.constant.none
    %490 = torch.aten.mean.dim %488, %489, %true_257, %none_258 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %490, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_259 = torch.constant.float 9.9999997473787516E-6
    %int1_260 = torch.constant.int 1
    %491 = torch.aten.add.Scalar %490, %float9.999990e-06_259, %int1_260 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %491, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %492 = torch.aten.rsqrt %491 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %492, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %493 = torch.aten.mul.Tensor %487, %492 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %493, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %494 = torch.aten.mul.Tensor %6, %493 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %494, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int5_261 = torch.constant.int 5
    %495 = torch.prims.convert_element_type %494, %int5_261 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %495, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int-2_262 = torch.constant.int -2
    %int-1_263 = torch.constant.int -1
    %496 = torch.aten.transpose.int %7, %int-2_262, %int-1_263 : !torch.vtensor<[14336,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,14336],f16>
    %int4_264 = torch.constant.int 4
    %497 = torch.aten.mul.int %int4_264, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_265 = torch.constant.int 4096
    %498 = torch.prim.ListConstruct %497, %int4096_265 : (!torch.int, !torch.int) -> !torch.list<int>
    %499 = torch.aten.view %495, %498 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %499, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %500 = torch.aten.mm %499, %496 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,14336],f16> -> !torch.vtensor<[?,14336],f16>
    torch.bind_symbolic_shape %500, [%292], affine_map<()[s0] -> (s0 * 128, 14336)> : !torch.vtensor<[?,14336],f16>
    %int4_266 = torch.constant.int 4
    %int14336 = torch.constant.int 14336
    %501 = torch.prim.ListConstruct %int4_266, %294, %int14336 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %502 = torch.aten.view %500, %501 : !torch.vtensor<[?,14336],f16>, !torch.list<int> -> !torch.vtensor<[4,?,14336],f16>
    torch.bind_symbolic_shape %502, [%292], affine_map<()[s0] -> (4, s0 * 32, 14336)> : !torch.vtensor<[4,?,14336],f16>
    %503 = torch.aten.silu %502 : !torch.vtensor<[4,?,14336],f16> -> !torch.vtensor<[4,?,14336],f16>
    torch.bind_symbolic_shape %503, [%292], affine_map<()[s0] -> (4, s0 * 32, 14336)> : !torch.vtensor<[4,?,14336],f16>
    %int-2_267 = torch.constant.int -2
    %int-1_268 = torch.constant.int -1
    %504 = torch.aten.transpose.int %8, %int-2_267, %int-1_268 : !torch.vtensor<[14336,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,14336],f16>
    %int4_269 = torch.constant.int 4
    %505 = torch.aten.mul.int %int4_269, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_270 = torch.constant.int 4096
    %506 = torch.prim.ListConstruct %505, %int4096_270 : (!torch.int, !torch.int) -> !torch.list<int>
    %507 = torch.aten.view %495, %506 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %507, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %508 = torch.aten.mm %507, %504 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,14336],f16> -> !torch.vtensor<[?,14336],f16>
    torch.bind_symbolic_shape %508, [%292], affine_map<()[s0] -> (s0 * 128, 14336)> : !torch.vtensor<[?,14336],f16>
    %int4_271 = torch.constant.int 4
    %int14336_272 = torch.constant.int 14336
    %509 = torch.prim.ListConstruct %int4_271, %294, %int14336_272 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %510 = torch.aten.view %508, %509 : !torch.vtensor<[?,14336],f16>, !torch.list<int> -> !torch.vtensor<[4,?,14336],f16>
    torch.bind_symbolic_shape %510, [%292], affine_map<()[s0] -> (4, s0 * 32, 14336)> : !torch.vtensor<[4,?,14336],f16>
    %511 = torch.aten.mul.Tensor %503, %510 : !torch.vtensor<[4,?,14336],f16>, !torch.vtensor<[4,?,14336],f16> -> !torch.vtensor<[4,?,14336],f16>
    torch.bind_symbolic_shape %511, [%292], affine_map<()[s0] -> (4, s0 * 32, 14336)> : !torch.vtensor<[4,?,14336],f16>
    %int-2_273 = torch.constant.int -2
    %int-1_274 = torch.constant.int -1
    %512 = torch.aten.transpose.int %9, %int-2_273, %int-1_274 : !torch.vtensor<[4096,14336],f16>, !torch.int, !torch.int -> !torch.vtensor<[14336,4096],f16>
    %int1_275 = torch.constant.int 1
    %513 = torch.aten.size.int %502, %int1_275 : !torch.vtensor<[4,?,14336],f16>, !torch.int -> !torch.int
    %int4_276 = torch.constant.int 4
    %514 = torch.aten.mul.int %int4_276, %513 : !torch.int, !torch.int -> !torch.int
    %int14336_277 = torch.constant.int 14336
    %515 = torch.prim.ListConstruct %514, %int14336_277 : (!torch.int, !torch.int) -> !torch.list<int>
    %516 = torch.aten.view %511, %515 : !torch.vtensor<[4,?,14336],f16>, !torch.list<int> -> !torch.vtensor<[?,14336],f16>
    torch.bind_symbolic_shape %516, [%292], affine_map<()[s0] -> (s0 * 128, 14336)> : !torch.vtensor<[?,14336],f16>
    %517 = torch.aten.mm %516, %512 : !torch.vtensor<[?,14336],f16>, !torch.vtensor<[14336,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %517, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_278 = torch.constant.int 4
    %int4096_279 = torch.constant.int 4096
    %518 = torch.prim.ListConstruct %int4_278, %513, %int4096_279 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %519 = torch.aten.view %517, %518 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %519, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_280 = torch.constant.int 1
    %520 = torch.aten.add.Tensor %486, %519, %int1_280 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %520, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int6_281 = torch.constant.int 6
    %521 = torch.prims.convert_element_type %520, %int6_281 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %521, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_282 = torch.constant.int 2
    %522 = torch.aten.pow.Tensor_Scalar %521, %int2_282 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %522, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int-1_283 = torch.constant.int -1
    %523 = torch.prim.ListConstruct %int-1_283 : (!torch.int) -> !torch.list<int>
    %true_284 = torch.constant.bool true
    %none_285 = torch.constant.none
    %524 = torch.aten.mean.dim %522, %523, %true_284, %none_285 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %524, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_286 = torch.constant.float 9.9999997473787516E-6
    %int1_287 = torch.constant.int 1
    %525 = torch.aten.add.Scalar %524, %float9.999990e-06_286, %int1_287 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %525, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %526 = torch.aten.rsqrt %525 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %526, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %527 = torch.aten.mul.Tensor %521, %526 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %527, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %528 = torch.aten.mul.Tensor %10, %527 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %528, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int5_288 = torch.constant.int 5
    %529 = torch.prims.convert_element_type %528, %int5_288 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %529, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int-2_289 = torch.constant.int -2
    %int-1_290 = torch.constant.int -1
    %530 = torch.aten.transpose.int %11, %int-2_289, %int-1_290 : !torch.vtensor<[4096,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,4096],f16>
    %int4_291 = torch.constant.int 4
    %531 = torch.aten.mul.int %int4_291, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_292 = torch.constant.int 4096
    %532 = torch.prim.ListConstruct %531, %int4096_292 : (!torch.int, !torch.int) -> !torch.list<int>
    %533 = torch.aten.view %529, %532 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %533, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %534 = torch.aten.mm %533, %530 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %534, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_293 = torch.constant.int 4
    %int4096_294 = torch.constant.int 4096
    %535 = torch.prim.ListConstruct %int4_293, %294, %int4096_294 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %536 = torch.aten.view %534, %535 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %536, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int-2_295 = torch.constant.int -2
    %int-1_296 = torch.constant.int -1
    %537 = torch.aten.transpose.int %12, %int-2_295, %int-1_296 : !torch.vtensor<[1024,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,1024],f16>
    %int4_297 = torch.constant.int 4
    %538 = torch.aten.mul.int %int4_297, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_298 = torch.constant.int 4096
    %539 = torch.prim.ListConstruct %538, %int4096_298 : (!torch.int, !torch.int) -> !torch.list<int>
    %540 = torch.aten.view %529, %539 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %540, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %541 = torch.aten.mm %540, %537 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1024],f16> -> !torch.vtensor<[?,1024],f16>
    torch.bind_symbolic_shape %541, [%292], affine_map<()[s0] -> (s0 * 128, 1024)> : !torch.vtensor<[?,1024],f16>
    %int4_299 = torch.constant.int 4
    %int1024_300 = torch.constant.int 1024
    %542 = torch.prim.ListConstruct %int4_299, %294, %int1024_300 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %543 = torch.aten.view %541, %542 : !torch.vtensor<[?,1024],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1024],f16>
    torch.bind_symbolic_shape %543, [%292], affine_map<()[s0] -> (4, s0 * 32, 1024)> : !torch.vtensor<[4,?,1024],f16>
    %int-2_301 = torch.constant.int -2
    %int-1_302 = torch.constant.int -1
    %544 = torch.aten.transpose.int %13, %int-2_301, %int-1_302 : !torch.vtensor<[1024,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,1024],f16>
    %int4_303 = torch.constant.int 4
    %545 = torch.aten.mul.int %int4_303, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_304 = torch.constant.int 4096
    %546 = torch.prim.ListConstruct %545, %int4096_304 : (!torch.int, !torch.int) -> !torch.list<int>
    %547 = torch.aten.view %529, %546 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %547, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %548 = torch.aten.mm %547, %544 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1024],f16> -> !torch.vtensor<[?,1024],f16>
    torch.bind_symbolic_shape %548, [%292], affine_map<()[s0] -> (s0 * 128, 1024)> : !torch.vtensor<[?,1024],f16>
    %int4_305 = torch.constant.int 4
    %int1024_306 = torch.constant.int 1024
    %549 = torch.prim.ListConstruct %int4_305, %294, %int1024_306 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %550 = torch.aten.view %548, %549 : !torch.vtensor<[?,1024],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1024],f16>
    torch.bind_symbolic_shape %550, [%292], affine_map<()[s0] -> (4, s0 * 32, 1024)> : !torch.vtensor<[4,?,1024],f16>
    %int4_307 = torch.constant.int 4
    %int32_308 = torch.constant.int 32
    %int128_309 = torch.constant.int 128
    %551 = torch.prim.ListConstruct %int4_307, %294, %int32_308, %int128_309 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %552 = torch.aten.view %536, %551 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %552, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int4_310 = torch.constant.int 4
    %int8_311 = torch.constant.int 8
    %int128_312 = torch.constant.int 128
    %553 = torch.prim.ListConstruct %int4_310, %294, %int8_311, %int128_312 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %554 = torch.aten.view %543, %553 : !torch.vtensor<[4,?,1024],f16>, !torch.list<int> -> !torch.vtensor<[4,?,8,128],f16>
    torch.bind_symbolic_shape %554, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 128)> : !torch.vtensor<[4,?,8,128],f16>
    %int4_313 = torch.constant.int 4
    %int8_314 = torch.constant.int 8
    %int128_315 = torch.constant.int 128
    %555 = torch.prim.ListConstruct %int4_313, %294, %int8_314, %int128_315 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %556 = torch.aten.view %550, %555 : !torch.vtensor<[4,?,1024],f16>, !torch.list<int> -> !torch.vtensor<[4,?,8,128],f16>
    torch.bind_symbolic_shape %556, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 128)> : !torch.vtensor<[4,?,8,128],f16>
    %int131072_316 = torch.constant.int 131072
    %none_317 = torch.constant.none
    %none_318 = torch.constant.none
    %cpu_319 = torch.constant.device "cpu"
    %false_320 = torch.constant.bool false
    %557 = torch.aten.arange %int131072_316, %none_317, %none_318, %cpu_319, %false_320 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64>
    %int0_321 = torch.constant.int 0
    %int128_322 = torch.constant.int 128
    %int2_323 = torch.constant.int 2
    %none_324 = torch.constant.none
    %none_325 = torch.constant.none
    %cpu_326 = torch.constant.device "cpu"
    %false_327 = torch.constant.bool false
    %558 = torch.aten.arange.start_step %int0_321, %int128_322, %int2_323, %none_324, %none_325, %cpu_326, %false_327 : !torch.int, !torch.int, !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64>
    %int0_328 = torch.constant.int 0
    %int0_329 = torch.constant.int 0
    %int64_330 = torch.constant.int 64
    %int1_331 = torch.constant.int 1
    %559 = torch.aten.slice.Tensor %558, %int0_328, %int0_329, %int64_330, %int1_331 : !torch.vtensor<[64],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[64],si64>
    %int6_332 = torch.constant.int 6
    %560 = torch.prims.convert_element_type %559, %int6_332 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32>
    %int128_333 = torch.constant.int 128
    %561 = torch.aten.div.Scalar %560, %int128_333 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32>
    %float5.000000e05_334 = torch.constant.float 5.000000e+05
    %562 = torch.aten.pow.Scalar %float5.000000e05_334, %561 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %563 = torch.aten.reciprocal %562 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %float1.000000e00_335 = torch.constant.float 1.000000e+00
    %564 = torch.aten.mul.Scalar %563, %float1.000000e00_335 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32>
    %int131072_336 = torch.constant.int 131072
    %int1_337 = torch.constant.int 1
    %565 = torch.prim.ListConstruct %int131072_336, %int1_337 : (!torch.int, !torch.int) -> !torch.list<int>
    %566 = torch.aten.view %557, %565 : !torch.vtensor<[131072],si64>, !torch.list<int> -> !torch.vtensor<[131072,1],si64>
    %567 = torch.aten.mul.Tensor %566, %564 : !torch.vtensor<[131072,1],si64>, !torch.vtensor<[64],f32> -> !torch.vtensor<[131072,64],f32>
    %568 = torch.aten.cos %567 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %569 = torch.aten.sin %567 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %570 = torch.aten.complex %568, %569 : !torch.vtensor<[131072,64],f32>, !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],complex<f32>>
    %int1_338 = torch.constant.int 1
    %571 = torch.aten.size.int %536, %int1_338 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.int
    %int0_339 = torch.constant.int 0
    %572 = torch.aten.add.int %int0_339, %571 : !torch.int, !torch.int -> !torch.int
    %int0_340 = torch.constant.int 0
    %int0_341 = torch.constant.int 0
    %int1_342 = torch.constant.int 1
    %573 = torch.aten.slice.Tensor %570, %int0_340, %int0_341, %572, %int1_342 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %573, [%292], affine_map<()[s0] -> (s0 * 32, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_343 = torch.constant.int 1
    %int0_344 = torch.constant.int 0
    %int9223372036854775807_345 = torch.constant.int 9223372036854775807
    %int1_346 = torch.constant.int 1
    %574 = torch.aten.slice.Tensor %573, %int1_343, %int0_344, %int9223372036854775807_345, %int1_346 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %574, [%292], affine_map<()[s0] -> (s0 * 32, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_347 = torch.constant.int 0
    %575 = torch.aten.unsqueeze %574, %int0_347 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %575, [%292], affine_map<()[s0] -> (1, s0 * 32, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_348 = torch.constant.int 2
    %576 = torch.aten.unsqueeze %575, %int2_348 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %576, [%292], affine_map<()[s0] -> (1, s0 * 32, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_349 = torch.constant.int 3
    %int0_350 = torch.constant.int 0
    %int9223372036854775807_351 = torch.constant.int 9223372036854775807
    %int1_352 = torch.constant.int 1
    %577 = torch.aten.slice.Tensor %576, %int3_349, %int0_350, %int9223372036854775807_351, %int1_352 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %577, [%292], affine_map<()[s0] -> (1, s0 * 32, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %578 = torch_c.to_builtin_tensor %552 : !torch.vtensor<[4,?,32,128],f16> -> tensor<4x?x32x128xf16>
    %c1_353 = arith.constant 1 : index
    %dim_354 = tensor.dim %578, %c1_353 : tensor<4x?x32x128xf16>
    %579 = flow.tensor.bitcast %578 : tensor<4x?x32x128xf16>{%dim_354} -> tensor<4x?x32x64xcomplex<f16>>{%dim_354}
    %580 = torch_c.from_builtin_tensor %579 : tensor<4x?x32x64xcomplex<f16>> -> !torch.vtensor<[4,?,32,64],complex<f16>>
    torch.bind_symbolic_shape %580, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 64)> : !torch.vtensor<[4,?,32,64],complex<f16>>
    %581 = torch.aten.mul.Tensor %580, %577 : !torch.vtensor<[4,?,32,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,32,64],complex<f32>>
    torch.bind_symbolic_shape %581, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 64)> : !torch.vtensor<[4,?,32,64],complex<f32>>
    %582 = torch_c.to_builtin_tensor %581 : !torch.vtensor<[4,?,32,64],complex<f32>> -> tensor<4x?x32x64xcomplex<f32>>
    %c1_355 = arith.constant 1 : index
    %dim_356 = tensor.dim %582, %c1_355 : tensor<4x?x32x64xcomplex<f32>>
    %583 = flow.tensor.bitcast %582 : tensor<4x?x32x64xcomplex<f32>>{%dim_356} -> tensor<4x?x32x128xf32>{%dim_356}
    %584 = torch_c.from_builtin_tensor %583 : tensor<4x?x32x128xf32> -> !torch.vtensor<[4,?,32,128],f32>
    torch.bind_symbolic_shape %584, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f32>
    %int5_357 = torch.constant.int 5
    %585 = torch.prims.convert_element_type %584, %int5_357 : !torch.vtensor<[4,?,32,128],f32>, !torch.int -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %585, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int131072_358 = torch.constant.int 131072
    %none_359 = torch.constant.none
    %none_360 = torch.constant.none
    %cpu_361 = torch.constant.device "cpu"
    %false_362 = torch.constant.bool false
    %586 = torch.aten.arange %int131072_358, %none_359, %none_360, %cpu_361, %false_362 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64>
    %int0_363 = torch.constant.int 0
    %int128_364 = torch.constant.int 128
    %int2_365 = torch.constant.int 2
    %none_366 = torch.constant.none
    %none_367 = torch.constant.none
    %cpu_368 = torch.constant.device "cpu"
    %false_369 = torch.constant.bool false
    %587 = torch.aten.arange.start_step %int0_363, %int128_364, %int2_365, %none_366, %none_367, %cpu_368, %false_369 : !torch.int, !torch.int, !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64>
    %int0_370 = torch.constant.int 0
    %int0_371 = torch.constant.int 0
    %int64_372 = torch.constant.int 64
    %int1_373 = torch.constant.int 1
    %588 = torch.aten.slice.Tensor %587, %int0_370, %int0_371, %int64_372, %int1_373 : !torch.vtensor<[64],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[64],si64>
    %int6_374 = torch.constant.int 6
    %589 = torch.prims.convert_element_type %588, %int6_374 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32>
    %int128_375 = torch.constant.int 128
    %590 = torch.aten.div.Scalar %589, %int128_375 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32>
    %float5.000000e05_376 = torch.constant.float 5.000000e+05
    %591 = torch.aten.pow.Scalar %float5.000000e05_376, %590 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %592 = torch.aten.reciprocal %591 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %float1.000000e00_377 = torch.constant.float 1.000000e+00
    %593 = torch.aten.mul.Scalar %592, %float1.000000e00_377 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32>
    %int131072_378 = torch.constant.int 131072
    %int1_379 = torch.constant.int 1
    %594 = torch.prim.ListConstruct %int131072_378, %int1_379 : (!torch.int, !torch.int) -> !torch.list<int>
    %595 = torch.aten.view %586, %594 : !torch.vtensor<[131072],si64>, !torch.list<int> -> !torch.vtensor<[131072,1],si64>
    %596 = torch.aten.mul.Tensor %595, %593 : !torch.vtensor<[131072,1],si64>, !torch.vtensor<[64],f32> -> !torch.vtensor<[131072,64],f32>
    %597 = torch.aten.cos %596 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %598 = torch.aten.sin %596 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %599 = torch.aten.complex %597, %598 : !torch.vtensor<[131072,64],f32>, !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],complex<f32>>
    %int1_380 = torch.constant.int 1
    %600 = torch.aten.size.int %543, %int1_380 : !torch.vtensor<[4,?,1024],f16>, !torch.int -> !torch.int
    %int0_381 = torch.constant.int 0
    %601 = torch.aten.add.int %int0_381, %600 : !torch.int, !torch.int -> !torch.int
    %int0_382 = torch.constant.int 0
    %int0_383 = torch.constant.int 0
    %int1_384 = torch.constant.int 1
    %602 = torch.aten.slice.Tensor %599, %int0_382, %int0_383, %601, %int1_384 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %602, [%292], affine_map<()[s0] -> (s0 * 32, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_385 = torch.constant.int 1
    %int0_386 = torch.constant.int 0
    %int9223372036854775807_387 = torch.constant.int 9223372036854775807
    %int1_388 = torch.constant.int 1
    %603 = torch.aten.slice.Tensor %602, %int1_385, %int0_386, %int9223372036854775807_387, %int1_388 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %603, [%292], affine_map<()[s0] -> (s0 * 32, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_389 = torch.constant.int 0
    %604 = torch.aten.unsqueeze %603, %int0_389 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %604, [%292], affine_map<()[s0] -> (1, s0 * 32, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_390 = torch.constant.int 2
    %605 = torch.aten.unsqueeze %604, %int2_390 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %605, [%292], affine_map<()[s0] -> (1, s0 * 32, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_391 = torch.constant.int 3
    %int0_392 = torch.constant.int 0
    %int9223372036854775807_393 = torch.constant.int 9223372036854775807
    %int1_394 = torch.constant.int 1
    %606 = torch.aten.slice.Tensor %605, %int3_391, %int0_392, %int9223372036854775807_393, %int1_394 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %606, [%292], affine_map<()[s0] -> (1, s0 * 32, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %607 = torch_c.to_builtin_tensor %554 : !torch.vtensor<[4,?,8,128],f16> -> tensor<4x?x8x128xf16>
    %c1_395 = arith.constant 1 : index
    %dim_396 = tensor.dim %607, %c1_395 : tensor<4x?x8x128xf16>
    %608 = flow.tensor.bitcast %607 : tensor<4x?x8x128xf16>{%dim_396} -> tensor<4x?x8x64xcomplex<f16>>{%dim_396}
    %609 = torch_c.from_builtin_tensor %608 : tensor<4x?x8x64xcomplex<f16>> -> !torch.vtensor<[4,?,8,64],complex<f16>>
    torch.bind_symbolic_shape %609, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 64)> : !torch.vtensor<[4,?,8,64],complex<f16>>
    %610 = torch.aten.mul.Tensor %609, %606 : !torch.vtensor<[4,?,8,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,8,64],complex<f32>>
    torch.bind_symbolic_shape %610, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 64)> : !torch.vtensor<[4,?,8,64],complex<f32>>
    %611 = torch_c.to_builtin_tensor %610 : !torch.vtensor<[4,?,8,64],complex<f32>> -> tensor<4x?x8x64xcomplex<f32>>
    %c1_397 = arith.constant 1 : index
    %dim_398 = tensor.dim %611, %c1_397 : tensor<4x?x8x64xcomplex<f32>>
    %612 = flow.tensor.bitcast %611 : tensor<4x?x8x64xcomplex<f32>>{%dim_398} -> tensor<4x?x8x128xf32>{%dim_398}
    %613 = torch_c.from_builtin_tensor %612 : tensor<4x?x8x128xf32> -> !torch.vtensor<[4,?,8,128],f32>
    torch.bind_symbolic_shape %613, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 128)> : !torch.vtensor<[4,?,8,128],f32>
    %int5_399 = torch.constant.int 5
    %614 = torch.prims.convert_element_type %613, %int5_399 : !torch.vtensor<[4,?,8,128],f32>, !torch.int -> !torch.vtensor<[4,?,8,128],f16>
    torch.bind_symbolic_shape %614, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 128)> : !torch.vtensor<[4,?,8,128],f16>
    %int64_400 = torch.constant.int 64
    %615 = torch.aten.mul.Scalar %arg2, %int64_400 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %615, [%292], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int2_401 = torch.constant.int 2
    %int1_402 = torch.constant.int 1
    %616 = torch.aten.add.Scalar %615, %int2_401, %int1_402 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %616, [%292], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int4_403 = torch.constant.int 4
    %int32_404 = torch.constant.int 32
    %int8_405 = torch.constant.int 8
    %int128_406 = torch.constant.int 128
    %617 = torch.prim.ListConstruct %int4_403, %425, %int32_404, %int8_405, %int128_406 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %618 = torch.aten.view %614, %617 : !torch.vtensor<[4,?,8,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,8,128],f16>
    torch.bind_symbolic_shape %618, [%292], affine_map<()[s0] -> (4, s0, 32, 8, 128)> : !torch.vtensor<[4,?,32,8,128],f16>
    %int4_407 = torch.constant.int 4
    %619 = torch.aten.mul.int %int4_407, %425 : !torch.int, !torch.int -> !torch.int
    %int32_408 = torch.constant.int 32
    %int8_409 = torch.constant.int 8
    %int128_410 = torch.constant.int 128
    %620 = torch.prim.ListConstruct %619, %int32_408, %int8_409, %int128_410 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %621 = torch.aten.view %618, %620 : !torch.vtensor<[4,?,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %621, [%292], affine_map<()[s0] -> (s0 * 4, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int4_411 = torch.constant.int 4
    %622 = torch.aten.mul.int %int4_411, %425 : !torch.int, !torch.int -> !torch.int
    %623 = torch.prim.ListConstruct %622 : (!torch.int) -> !torch.list<int>
    %624 = torch.aten.view %616, %623 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %624, [%292], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int32_412 = torch.constant.int 32
    %int2_413 = torch.constant.int 2
    %int32_414 = torch.constant.int 32
    %int8_415 = torch.constant.int 8
    %int128_416 = torch.constant.int 128
    %625 = torch.prim.ListConstruct %416, %int32_412, %int2_413, %int32_414, %int8_415, %int128_416 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %626 = torch.aten.view %458, %625 : !torch.vtensor<[?,2097152],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f16>
    torch.bind_symbolic_shape %626, [%293], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f16>
    %int32_417 = torch.constant.int 32
    %627 = torch.aten.mul.int %416, %int32_417 : !torch.int, !torch.int -> !torch.int
    %int2_418 = torch.constant.int 2
    %628 = torch.aten.mul.int %627, %int2_418 : !torch.int, !torch.int -> !torch.int
    %int32_419 = torch.constant.int 32
    %int8_420 = torch.constant.int 8
    %int128_421 = torch.constant.int 128
    %629 = torch.prim.ListConstruct %628, %int32_419, %int8_420, %int128_421 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %630 = torch.aten.view %626, %629 : !torch.vtensor<[?,32,2,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %630, [%293], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %631 = torch.prim.ListConstruct %624 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_422 = torch.constant.bool false
    %632 = torch.aten.index_put %630, %631, %621, %false_422 : !torch.vtensor<[?,32,8,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,32,8,128],f16>, !torch.bool -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %632, [%293], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int32_423 = torch.constant.int 32
    %int2_424 = torch.constant.int 2
    %int32_425 = torch.constant.int 32
    %int8_426 = torch.constant.int 8
    %int128_427 = torch.constant.int 128
    %633 = torch.prim.ListConstruct %416, %int32_423, %int2_424, %int32_425, %int8_426, %int128_427 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %634 = torch.aten.view %632, %633 : !torch.vtensor<[?,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f16>
    torch.bind_symbolic_shape %634, [%293], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f16>
    %int2097152_428 = torch.constant.int 2097152
    %635 = torch.prim.ListConstruct %416, %int2097152_428 : (!torch.int, !torch.int) -> !torch.list<int>
    %636 = torch.aten.view %634, %635 : !torch.vtensor<[?,32,2,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,2097152],f16>
    torch.bind_symbolic_shape %636, [%293], affine_map<()[s0] -> (s0, 2097152)> : !torch.vtensor<[?,2097152],f16>
    %int32_429 = torch.constant.int 32
    %int2_430 = torch.constant.int 2
    %int32_431 = torch.constant.int 32
    %int8_432 = torch.constant.int 8
    %int128_433 = torch.constant.int 128
    %637 = torch.prim.ListConstruct %416, %int32_429, %int2_430, %int32_431, %int8_432, %int128_433 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %638 = torch.aten.view %636, %637 : !torch.vtensor<[?,2097152],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f16>
    torch.bind_symbolic_shape %638, [%293], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f16>
    %int32_434 = torch.constant.int 32
    %int8_435 = torch.constant.int 8
    %int128_436 = torch.constant.int 128
    %639 = torch.prim.ListConstruct %628, %int32_434, %int8_435, %int128_436 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %640 = torch.aten.view %638, %639 : !torch.vtensor<[?,32,2,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %640, [%293], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int4_437 = torch.constant.int 4
    %int32_438 = torch.constant.int 32
    %int8_439 = torch.constant.int 8
    %int128_440 = torch.constant.int 128
    %641 = torch.prim.ListConstruct %int4_437, %425, %int32_438, %int8_439, %int128_440 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %642 = torch.aten.view %556, %641 : !torch.vtensor<[4,?,8,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,8,128],f16>
    torch.bind_symbolic_shape %642, [%292], affine_map<()[s0] -> (4, s0, 32, 8, 128)> : !torch.vtensor<[4,?,32,8,128],f16>
    %int4_441 = torch.constant.int 4
    %643 = torch.aten.mul.int %int4_441, %425 : !torch.int, !torch.int -> !torch.int
    %int32_442 = torch.constant.int 32
    %int8_443 = torch.constant.int 8
    %int128_444 = torch.constant.int 128
    %644 = torch.prim.ListConstruct %643, %int32_442, %int8_443, %int128_444 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %645 = torch.aten.view %642, %644 : !torch.vtensor<[4,?,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %645, [%292], affine_map<()[s0] -> (s0 * 4, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int1_445 = torch.constant.int 1
    %int1_446 = torch.constant.int 1
    %646 = torch.aten.add.Scalar %616, %int1_445, %int1_446 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %646, [%292], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int4_447 = torch.constant.int 4
    %647 = torch.aten.mul.int %int4_447, %425 : !torch.int, !torch.int -> !torch.int
    %648 = torch.prim.ListConstruct %647 : (!torch.int) -> !torch.list<int>
    %649 = torch.aten.view %646, %648 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %649, [%292], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %650 = torch.prim.ListConstruct %649 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_448 = torch.constant.bool false
    %651 = torch.aten.index_put %640, %650, %645, %false_448 : !torch.vtensor<[?,32,8,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,32,8,128],f16>, !torch.bool -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %651, [%293], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int32_449 = torch.constant.int 32
    %int2_450 = torch.constant.int 2
    %int32_451 = torch.constant.int 32
    %int8_452 = torch.constant.int 8
    %int128_453 = torch.constant.int 128
    %652 = torch.prim.ListConstruct %416, %int32_449, %int2_450, %int32_451, %int8_452, %int128_453 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %653 = torch.aten.view %651, %652 : !torch.vtensor<[?,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f16>
    torch.bind_symbolic_shape %653, [%293], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f16>
    %int2097152_454 = torch.constant.int 2097152
    %654 = torch.prim.ListConstruct %416, %int2097152_454 : (!torch.int, !torch.int) -> !torch.list<int>
    %655 = torch.aten.view %653, %654 : !torch.vtensor<[?,32,2,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,2097152],f16>
    torch.bind_symbolic_shape %655, [%293], affine_map<()[s0] -> (s0, 2097152)> : !torch.vtensor<[?,2097152],f16>
    %int-2_455 = torch.constant.int -2
    %656 = torch.aten.unsqueeze %614, %int-2_455 : !torch.vtensor<[4,?,8,128],f16>, !torch.int -> !torch.vtensor<[4,?,8,1,128],f16>
    torch.bind_symbolic_shape %656, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 1, 128)> : !torch.vtensor<[4,?,8,1,128],f16>
    %int4_456 = torch.constant.int 4
    %int8_457 = torch.constant.int 8
    %int4_458 = torch.constant.int 4
    %int128_459 = torch.constant.int 128
    %657 = torch.prim.ListConstruct %int4_456, %600, %int8_457, %int4_458, %int128_459 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_460 = torch.constant.bool false
    %658 = torch.aten.expand %656, %657, %false_460 : !torch.vtensor<[4,?,8,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,8,4,128],f16>
    torch.bind_symbolic_shape %658, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 4, 128)> : !torch.vtensor<[4,?,8,4,128],f16>
    %int0_461 = torch.constant.int 0
    %659 = torch.aten.clone %658, %int0_461 : !torch.vtensor<[4,?,8,4,128],f16>, !torch.int -> !torch.vtensor<[4,?,8,4,128],f16>
    torch.bind_symbolic_shape %659, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 4, 128)> : !torch.vtensor<[4,?,8,4,128],f16>
    %int4_462 = torch.constant.int 4
    %int32_463 = torch.constant.int 32
    %int128_464 = torch.constant.int 128
    %660 = torch.prim.ListConstruct %int4_462, %600, %int32_463, %int128_464 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %661 = torch.aten._unsafe_view %659, %660 : !torch.vtensor<[4,?,8,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %661, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int-2_465 = torch.constant.int -2
    %662 = torch.aten.unsqueeze %556, %int-2_465 : !torch.vtensor<[4,?,8,128],f16>, !torch.int -> !torch.vtensor<[4,?,8,1,128],f16>
    torch.bind_symbolic_shape %662, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 1, 128)> : !torch.vtensor<[4,?,8,1,128],f16>
    %int1_466 = torch.constant.int 1
    %663 = torch.aten.size.int %550, %int1_466 : !torch.vtensor<[4,?,1024],f16>, !torch.int -> !torch.int
    %int4_467 = torch.constant.int 4
    %int8_468 = torch.constant.int 8
    %int4_469 = torch.constant.int 4
    %int128_470 = torch.constant.int 128
    %664 = torch.prim.ListConstruct %int4_467, %663, %int8_468, %int4_469, %int128_470 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_471 = torch.constant.bool false
    %665 = torch.aten.expand %662, %664, %false_471 : !torch.vtensor<[4,?,8,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,8,4,128],f16>
    torch.bind_symbolic_shape %665, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 4, 128)> : !torch.vtensor<[4,?,8,4,128],f16>
    %int0_472 = torch.constant.int 0
    %666 = torch.aten.clone %665, %int0_472 : !torch.vtensor<[4,?,8,4,128],f16>, !torch.int -> !torch.vtensor<[4,?,8,4,128],f16>
    torch.bind_symbolic_shape %666, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 4, 128)> : !torch.vtensor<[4,?,8,4,128],f16>
    %int4_473 = torch.constant.int 4
    %int32_474 = torch.constant.int 32
    %int128_475 = torch.constant.int 128
    %667 = torch.prim.ListConstruct %int4_473, %663, %int32_474, %int128_475 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %668 = torch.aten._unsafe_view %666, %667 : !torch.vtensor<[4,?,8,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %668, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int1_476 = torch.constant.int 1
    %int2_477 = torch.constant.int 2
    %669 = torch.aten.transpose.int %585, %int1_476, %int2_477 : !torch.vtensor<[4,?,32,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,32,?,128],f16>
    torch.bind_symbolic_shape %669, [%292], affine_map<()[s0] -> (4, 32, s0 * 32, 128)> : !torch.vtensor<[4,32,?,128],f16>
    %int1_478 = torch.constant.int 1
    %int2_479 = torch.constant.int 2
    %670 = torch.aten.transpose.int %661, %int1_478, %int2_479 : !torch.vtensor<[4,?,32,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,32,?,128],f16>
    torch.bind_symbolic_shape %670, [%292], affine_map<()[s0] -> (4, 32, s0 * 32, 128)> : !torch.vtensor<[4,32,?,128],f16>
    %int1_480 = torch.constant.int 1
    %int2_481 = torch.constant.int 2
    %671 = torch.aten.transpose.int %668, %int1_480, %int2_481 : !torch.vtensor<[4,?,32,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,32,?,128],f16>
    torch.bind_symbolic_shape %671, [%292], affine_map<()[s0] -> (4, 32, s0 * 32, 128)> : !torch.vtensor<[4,32,?,128],f16>
    %float0.000000e00_482 = torch.constant.float 0.000000e+00
    %false_483 = torch.constant.bool false
    %none_484 = torch.constant.none
    %672:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%669, %670, %671, %float0.000000e00_482, %false_483, %320, %none_484) : (!torch.vtensor<[4,32,?,128],f16>, !torch.vtensor<[4,32,?,128],f16>, !torch.vtensor<[4,32,?,128],f16>, !torch.float, !torch.bool, !torch.vtensor<[4,1,?,?],f16>, !torch.none) -> (!torch.vtensor<[4,32,?,128],f16>, !torch.vtensor<[4,32,?],f32>) 
    torch.bind_symbolic_shape %672#0, [%292], affine_map<()[s0] -> (4, 32, s0 * 32, 128)> : !torch.vtensor<[4,32,?,128],f16>
    %int1_485 = torch.constant.int 1
    %int2_486 = torch.constant.int 2
    %673 = torch.aten.transpose.int %672#0, %int1_485, %int2_486 : !torch.vtensor<[4,32,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %673, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int4_487 = torch.constant.int 4
    %int4096_488 = torch.constant.int 4096
    %674 = torch.prim.ListConstruct %int4_487, %571, %int4096_488 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %675 = torch.aten.view %673, %674 : !torch.vtensor<[4,?,32,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %675, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int-2_489 = torch.constant.int -2
    %int-1_490 = torch.constant.int -1
    %676 = torch.aten.transpose.int %14, %int-2_489, %int-1_490 : !torch.vtensor<[4096,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,4096],f16>
    %int4_491 = torch.constant.int 4
    %677 = torch.aten.mul.int %int4_491, %571 : !torch.int, !torch.int -> !torch.int
    %int4096_492 = torch.constant.int 4096
    %678 = torch.prim.ListConstruct %677, %int4096_492 : (!torch.int, !torch.int) -> !torch.list<int>
    %679 = torch.aten.view %675, %678 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %679, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %680 = torch.aten.mm %679, %676 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %680, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_493 = torch.constant.int 4
    %int4096_494 = torch.constant.int 4096
    %681 = torch.prim.ListConstruct %int4_493, %571, %int4096_494 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %682 = torch.aten.view %680, %681 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %682, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_495 = torch.constant.int 1
    %683 = torch.aten.add.Tensor %520, %682, %int1_495 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %683, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int6_496 = torch.constant.int 6
    %684 = torch.prims.convert_element_type %683, %int6_496 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %684, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_497 = torch.constant.int 2
    %685 = torch.aten.pow.Tensor_Scalar %684, %int2_497 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %685, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int-1_498 = torch.constant.int -1
    %686 = torch.prim.ListConstruct %int-1_498 : (!torch.int) -> !torch.list<int>
    %true_499 = torch.constant.bool true
    %none_500 = torch.constant.none
    %687 = torch.aten.mean.dim %685, %686, %true_499, %none_500 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %687, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_501 = torch.constant.float 9.9999997473787516E-6
    %int1_502 = torch.constant.int 1
    %688 = torch.aten.add.Scalar %687, %float9.999990e-06_501, %int1_502 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %688, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %689 = torch.aten.rsqrt %688 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %689, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %690 = torch.aten.mul.Tensor %684, %689 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %690, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %691 = torch.aten.mul.Tensor %15, %690 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %691, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int5_503 = torch.constant.int 5
    %692 = torch.prims.convert_element_type %691, %int5_503 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %692, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int-2_504 = torch.constant.int -2
    %int-1_505 = torch.constant.int -1
    %693 = torch.aten.transpose.int %16, %int-2_504, %int-1_505 : !torch.vtensor<[14336,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,14336],f16>
    %int4_506 = torch.constant.int 4
    %694 = torch.aten.mul.int %int4_506, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_507 = torch.constant.int 4096
    %695 = torch.prim.ListConstruct %694, %int4096_507 : (!torch.int, !torch.int) -> !torch.list<int>
    %696 = torch.aten.view %692, %695 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %696, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %697 = torch.aten.mm %696, %693 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,14336],f16> -> !torch.vtensor<[?,14336],f16>
    torch.bind_symbolic_shape %697, [%292], affine_map<()[s0] -> (s0 * 128, 14336)> : !torch.vtensor<[?,14336],f16>
    %int4_508 = torch.constant.int 4
    %int14336_509 = torch.constant.int 14336
    %698 = torch.prim.ListConstruct %int4_508, %294, %int14336_509 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %699 = torch.aten.view %697, %698 : !torch.vtensor<[?,14336],f16>, !torch.list<int> -> !torch.vtensor<[4,?,14336],f16>
    torch.bind_symbolic_shape %699, [%292], affine_map<()[s0] -> (4, s0 * 32, 14336)> : !torch.vtensor<[4,?,14336],f16>
    %700 = torch.aten.silu %699 : !torch.vtensor<[4,?,14336],f16> -> !torch.vtensor<[4,?,14336],f16>
    torch.bind_symbolic_shape %700, [%292], affine_map<()[s0] -> (4, s0 * 32, 14336)> : !torch.vtensor<[4,?,14336],f16>
    %int-2_510 = torch.constant.int -2
    %int-1_511 = torch.constant.int -1
    %701 = torch.aten.transpose.int %17, %int-2_510, %int-1_511 : !torch.vtensor<[14336,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,14336],f16>
    %int4_512 = torch.constant.int 4
    %702 = torch.aten.mul.int %int4_512, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_513 = torch.constant.int 4096
    %703 = torch.prim.ListConstruct %702, %int4096_513 : (!torch.int, !torch.int) -> !torch.list<int>
    %704 = torch.aten.view %692, %703 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %704, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %705 = torch.aten.mm %704, %701 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,14336],f16> -> !torch.vtensor<[?,14336],f16>
    torch.bind_symbolic_shape %705, [%292], affine_map<()[s0] -> (s0 * 128, 14336)> : !torch.vtensor<[?,14336],f16>
    %int4_514 = torch.constant.int 4
    %int14336_515 = torch.constant.int 14336
    %706 = torch.prim.ListConstruct %int4_514, %294, %int14336_515 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %707 = torch.aten.view %705, %706 : !torch.vtensor<[?,14336],f16>, !torch.list<int> -> !torch.vtensor<[4,?,14336],f16>
    torch.bind_symbolic_shape %707, [%292], affine_map<()[s0] -> (4, s0 * 32, 14336)> : !torch.vtensor<[4,?,14336],f16>
    %708 = torch.aten.mul.Tensor %700, %707 : !torch.vtensor<[4,?,14336],f16>, !torch.vtensor<[4,?,14336],f16> -> !torch.vtensor<[4,?,14336],f16>
    torch.bind_symbolic_shape %708, [%292], affine_map<()[s0] -> (4, s0 * 32, 14336)> : !torch.vtensor<[4,?,14336],f16>
    %int-2_516 = torch.constant.int -2
    %int-1_517 = torch.constant.int -1
    %709 = torch.aten.transpose.int %18, %int-2_516, %int-1_517 : !torch.vtensor<[4096,14336],f16>, !torch.int, !torch.int -> !torch.vtensor<[14336,4096],f16>
    %int1_518 = torch.constant.int 1
    %710 = torch.aten.size.int %699, %int1_518 : !torch.vtensor<[4,?,14336],f16>, !torch.int -> !torch.int
    %int4_519 = torch.constant.int 4
    %711 = torch.aten.mul.int %int4_519, %710 : !torch.int, !torch.int -> !torch.int
    %int14336_520 = torch.constant.int 14336
    %712 = torch.prim.ListConstruct %711, %int14336_520 : (!torch.int, !torch.int) -> !torch.list<int>
    %713 = torch.aten.view %708, %712 : !torch.vtensor<[4,?,14336],f16>, !torch.list<int> -> !torch.vtensor<[?,14336],f16>
    torch.bind_symbolic_shape %713, [%292], affine_map<()[s0] -> (s0 * 128, 14336)> : !torch.vtensor<[?,14336],f16>
    %714 = torch.aten.mm %713, %709 : !torch.vtensor<[?,14336],f16>, !torch.vtensor<[14336,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %714, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_521 = torch.constant.int 4
    %int4096_522 = torch.constant.int 4096
    %715 = torch.prim.ListConstruct %int4_521, %710, %int4096_522 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %716 = torch.aten.view %714, %715 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %716, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_523 = torch.constant.int 1
    %717 = torch.aten.add.Tensor %683, %716, %int1_523 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %717, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int6_524 = torch.constant.int 6
    %718 = torch.prims.convert_element_type %717, %int6_524 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %718, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_525 = torch.constant.int 2
    %719 = torch.aten.pow.Tensor_Scalar %718, %int2_525 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %719, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int-1_526 = torch.constant.int -1
    %720 = torch.prim.ListConstruct %int-1_526 : (!torch.int) -> !torch.list<int>
    %true_527 = torch.constant.bool true
    %none_528 = torch.constant.none
    %721 = torch.aten.mean.dim %719, %720, %true_527, %none_528 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %721, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_529 = torch.constant.float 9.9999997473787516E-6
    %int1_530 = torch.constant.int 1
    %722 = torch.aten.add.Scalar %721, %float9.999990e-06_529, %int1_530 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %722, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %723 = torch.aten.rsqrt %722 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %723, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %724 = torch.aten.mul.Tensor %718, %723 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %724, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %725 = torch.aten.mul.Tensor %19, %724 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %725, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int5_531 = torch.constant.int 5
    %726 = torch.prims.convert_element_type %725, %int5_531 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %726, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int-2_532 = torch.constant.int -2
    %int-1_533 = torch.constant.int -1
    %727 = torch.aten.transpose.int %20, %int-2_532, %int-1_533 : !torch.vtensor<[4096,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,4096],f16>
    %int4_534 = torch.constant.int 4
    %728 = torch.aten.mul.int %int4_534, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_535 = torch.constant.int 4096
    %729 = torch.prim.ListConstruct %728, %int4096_535 : (!torch.int, !torch.int) -> !torch.list<int>
    %730 = torch.aten.view %726, %729 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %730, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %731 = torch.aten.mm %730, %727 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %731, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_536 = torch.constant.int 4
    %int4096_537 = torch.constant.int 4096
    %732 = torch.prim.ListConstruct %int4_536, %294, %int4096_537 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %733 = torch.aten.view %731, %732 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %733, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int-2_538 = torch.constant.int -2
    %int-1_539 = torch.constant.int -1
    %734 = torch.aten.transpose.int %21, %int-2_538, %int-1_539 : !torch.vtensor<[1024,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,1024],f16>
    %int4_540 = torch.constant.int 4
    %735 = torch.aten.mul.int %int4_540, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_541 = torch.constant.int 4096
    %736 = torch.prim.ListConstruct %735, %int4096_541 : (!torch.int, !torch.int) -> !torch.list<int>
    %737 = torch.aten.view %726, %736 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %737, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %738 = torch.aten.mm %737, %734 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1024],f16> -> !torch.vtensor<[?,1024],f16>
    torch.bind_symbolic_shape %738, [%292], affine_map<()[s0] -> (s0 * 128, 1024)> : !torch.vtensor<[?,1024],f16>
    %int4_542 = torch.constant.int 4
    %int1024_543 = torch.constant.int 1024
    %739 = torch.prim.ListConstruct %int4_542, %294, %int1024_543 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %740 = torch.aten.view %738, %739 : !torch.vtensor<[?,1024],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1024],f16>
    torch.bind_symbolic_shape %740, [%292], affine_map<()[s0] -> (4, s0 * 32, 1024)> : !torch.vtensor<[4,?,1024],f16>
    %int-2_544 = torch.constant.int -2
    %int-1_545 = torch.constant.int -1
    %741 = torch.aten.transpose.int %22, %int-2_544, %int-1_545 : !torch.vtensor<[1024,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,1024],f16>
    %int4_546 = torch.constant.int 4
    %742 = torch.aten.mul.int %int4_546, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_547 = torch.constant.int 4096
    %743 = torch.prim.ListConstruct %742, %int4096_547 : (!torch.int, !torch.int) -> !torch.list<int>
    %744 = torch.aten.view %726, %743 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %744, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %745 = torch.aten.mm %744, %741 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1024],f16> -> !torch.vtensor<[?,1024],f16>
    torch.bind_symbolic_shape %745, [%292], affine_map<()[s0] -> (s0 * 128, 1024)> : !torch.vtensor<[?,1024],f16>
    %int4_548 = torch.constant.int 4
    %int1024_549 = torch.constant.int 1024
    %746 = torch.prim.ListConstruct %int4_548, %294, %int1024_549 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %747 = torch.aten.view %745, %746 : !torch.vtensor<[?,1024],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1024],f16>
    torch.bind_symbolic_shape %747, [%292], affine_map<()[s0] -> (4, s0 * 32, 1024)> : !torch.vtensor<[4,?,1024],f16>
    %int4_550 = torch.constant.int 4
    %int32_551 = torch.constant.int 32
    %int128_552 = torch.constant.int 128
    %748 = torch.prim.ListConstruct %int4_550, %294, %int32_551, %int128_552 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %749 = torch.aten.view %733, %748 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %749, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int4_553 = torch.constant.int 4
    %int8_554 = torch.constant.int 8
    %int128_555 = torch.constant.int 128
    %750 = torch.prim.ListConstruct %int4_553, %294, %int8_554, %int128_555 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %751 = torch.aten.view %740, %750 : !torch.vtensor<[4,?,1024],f16>, !torch.list<int> -> !torch.vtensor<[4,?,8,128],f16>
    torch.bind_symbolic_shape %751, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 128)> : !torch.vtensor<[4,?,8,128],f16>
    %int4_556 = torch.constant.int 4
    %int8_557 = torch.constant.int 8
    %int128_558 = torch.constant.int 128
    %752 = torch.prim.ListConstruct %int4_556, %294, %int8_557, %int128_558 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %753 = torch.aten.view %747, %752 : !torch.vtensor<[4,?,1024],f16>, !torch.list<int> -> !torch.vtensor<[4,?,8,128],f16>
    torch.bind_symbolic_shape %753, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 128)> : !torch.vtensor<[4,?,8,128],f16>
    %int131072_559 = torch.constant.int 131072
    %none_560 = torch.constant.none
    %none_561 = torch.constant.none
    %cpu_562 = torch.constant.device "cpu"
    %false_563 = torch.constant.bool false
    %754 = torch.aten.arange %int131072_559, %none_560, %none_561, %cpu_562, %false_563 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64>
    %int0_564 = torch.constant.int 0
    %int128_565 = torch.constant.int 128
    %int2_566 = torch.constant.int 2
    %none_567 = torch.constant.none
    %none_568 = torch.constant.none
    %cpu_569 = torch.constant.device "cpu"
    %false_570 = torch.constant.bool false
    %755 = torch.aten.arange.start_step %int0_564, %int128_565, %int2_566, %none_567, %none_568, %cpu_569, %false_570 : !torch.int, !torch.int, !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64>
    %int0_571 = torch.constant.int 0
    %int0_572 = torch.constant.int 0
    %int64_573 = torch.constant.int 64
    %int1_574 = torch.constant.int 1
    %756 = torch.aten.slice.Tensor %755, %int0_571, %int0_572, %int64_573, %int1_574 : !torch.vtensor<[64],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[64],si64>
    %int6_575 = torch.constant.int 6
    %757 = torch.prims.convert_element_type %756, %int6_575 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32>
    %int128_576 = torch.constant.int 128
    %758 = torch.aten.div.Scalar %757, %int128_576 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32>
    %float5.000000e05_577 = torch.constant.float 5.000000e+05
    %759 = torch.aten.pow.Scalar %float5.000000e05_577, %758 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %760 = torch.aten.reciprocal %759 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %float1.000000e00_578 = torch.constant.float 1.000000e+00
    %761 = torch.aten.mul.Scalar %760, %float1.000000e00_578 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32>
    %int131072_579 = torch.constant.int 131072
    %int1_580 = torch.constant.int 1
    %762 = torch.prim.ListConstruct %int131072_579, %int1_580 : (!torch.int, !torch.int) -> !torch.list<int>
    %763 = torch.aten.view %754, %762 : !torch.vtensor<[131072],si64>, !torch.list<int> -> !torch.vtensor<[131072,1],si64>
    %764 = torch.aten.mul.Tensor %763, %761 : !torch.vtensor<[131072,1],si64>, !torch.vtensor<[64],f32> -> !torch.vtensor<[131072,64],f32>
    %765 = torch.aten.cos %764 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %766 = torch.aten.sin %764 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %767 = torch.aten.complex %765, %766 : !torch.vtensor<[131072,64],f32>, !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],complex<f32>>
    %int1_581 = torch.constant.int 1
    %768 = torch.aten.size.int %733, %int1_581 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.int
    %int0_582 = torch.constant.int 0
    %769 = torch.aten.add.int %int0_582, %768 : !torch.int, !torch.int -> !torch.int
    %int0_583 = torch.constant.int 0
    %int0_584 = torch.constant.int 0
    %int1_585 = torch.constant.int 1
    %770 = torch.aten.slice.Tensor %767, %int0_583, %int0_584, %769, %int1_585 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %770, [%292], affine_map<()[s0] -> (s0 * 32, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_586 = torch.constant.int 1
    %int0_587 = torch.constant.int 0
    %int9223372036854775807_588 = torch.constant.int 9223372036854775807
    %int1_589 = torch.constant.int 1
    %771 = torch.aten.slice.Tensor %770, %int1_586, %int0_587, %int9223372036854775807_588, %int1_589 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %771, [%292], affine_map<()[s0] -> (s0 * 32, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_590 = torch.constant.int 0
    %772 = torch.aten.unsqueeze %771, %int0_590 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %772, [%292], affine_map<()[s0] -> (1, s0 * 32, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_591 = torch.constant.int 2
    %773 = torch.aten.unsqueeze %772, %int2_591 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %773, [%292], affine_map<()[s0] -> (1, s0 * 32, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_592 = torch.constant.int 3
    %int0_593 = torch.constant.int 0
    %int9223372036854775807_594 = torch.constant.int 9223372036854775807
    %int1_595 = torch.constant.int 1
    %774 = torch.aten.slice.Tensor %773, %int3_592, %int0_593, %int9223372036854775807_594, %int1_595 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %774, [%292], affine_map<()[s0] -> (1, s0 * 32, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %775 = torch_c.to_builtin_tensor %749 : !torch.vtensor<[4,?,32,128],f16> -> tensor<4x?x32x128xf16>
    %c1_596 = arith.constant 1 : index
    %dim_597 = tensor.dim %775, %c1_596 : tensor<4x?x32x128xf16>
    %776 = flow.tensor.bitcast %775 : tensor<4x?x32x128xf16>{%dim_597} -> tensor<4x?x32x64xcomplex<f16>>{%dim_597}
    %777 = torch_c.from_builtin_tensor %776 : tensor<4x?x32x64xcomplex<f16>> -> !torch.vtensor<[4,?,32,64],complex<f16>>
    torch.bind_symbolic_shape %777, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 64)> : !torch.vtensor<[4,?,32,64],complex<f16>>
    %778 = torch.aten.mul.Tensor %777, %774 : !torch.vtensor<[4,?,32,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,32,64],complex<f32>>
    torch.bind_symbolic_shape %778, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 64)> : !torch.vtensor<[4,?,32,64],complex<f32>>
    %779 = torch_c.to_builtin_tensor %778 : !torch.vtensor<[4,?,32,64],complex<f32>> -> tensor<4x?x32x64xcomplex<f32>>
    %c1_598 = arith.constant 1 : index
    %dim_599 = tensor.dim %779, %c1_598 : tensor<4x?x32x64xcomplex<f32>>
    %780 = flow.tensor.bitcast %779 : tensor<4x?x32x64xcomplex<f32>>{%dim_599} -> tensor<4x?x32x128xf32>{%dim_599}
    %781 = torch_c.from_builtin_tensor %780 : tensor<4x?x32x128xf32> -> !torch.vtensor<[4,?,32,128],f32>
    torch.bind_symbolic_shape %781, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f32>
    %int5_600 = torch.constant.int 5
    %782 = torch.prims.convert_element_type %781, %int5_600 : !torch.vtensor<[4,?,32,128],f32>, !torch.int -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %782, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int131072_601 = torch.constant.int 131072
    %none_602 = torch.constant.none
    %none_603 = torch.constant.none
    %cpu_604 = torch.constant.device "cpu"
    %false_605 = torch.constant.bool false
    %783 = torch.aten.arange %int131072_601, %none_602, %none_603, %cpu_604, %false_605 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64>
    %int0_606 = torch.constant.int 0
    %int128_607 = torch.constant.int 128
    %int2_608 = torch.constant.int 2
    %none_609 = torch.constant.none
    %none_610 = torch.constant.none
    %cpu_611 = torch.constant.device "cpu"
    %false_612 = torch.constant.bool false
    %784 = torch.aten.arange.start_step %int0_606, %int128_607, %int2_608, %none_609, %none_610, %cpu_611, %false_612 : !torch.int, !torch.int, !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64>
    %int0_613 = torch.constant.int 0
    %int0_614 = torch.constant.int 0
    %int64_615 = torch.constant.int 64
    %int1_616 = torch.constant.int 1
    %785 = torch.aten.slice.Tensor %784, %int0_613, %int0_614, %int64_615, %int1_616 : !torch.vtensor<[64],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[64],si64>
    %int6_617 = torch.constant.int 6
    %786 = torch.prims.convert_element_type %785, %int6_617 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32>
    %int128_618 = torch.constant.int 128
    %787 = torch.aten.div.Scalar %786, %int128_618 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32>
    %float5.000000e05_619 = torch.constant.float 5.000000e+05
    %788 = torch.aten.pow.Scalar %float5.000000e05_619, %787 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %789 = torch.aten.reciprocal %788 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %float1.000000e00_620 = torch.constant.float 1.000000e+00
    %790 = torch.aten.mul.Scalar %789, %float1.000000e00_620 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32>
    %int131072_621 = torch.constant.int 131072
    %int1_622 = torch.constant.int 1
    %791 = torch.prim.ListConstruct %int131072_621, %int1_622 : (!torch.int, !torch.int) -> !torch.list<int>
    %792 = torch.aten.view %783, %791 : !torch.vtensor<[131072],si64>, !torch.list<int> -> !torch.vtensor<[131072,1],si64>
    %793 = torch.aten.mul.Tensor %792, %790 : !torch.vtensor<[131072,1],si64>, !torch.vtensor<[64],f32> -> !torch.vtensor<[131072,64],f32>
    %794 = torch.aten.cos %793 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %795 = torch.aten.sin %793 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %796 = torch.aten.complex %794, %795 : !torch.vtensor<[131072,64],f32>, !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],complex<f32>>
    %int1_623 = torch.constant.int 1
    %797 = torch.aten.size.int %740, %int1_623 : !torch.vtensor<[4,?,1024],f16>, !torch.int -> !torch.int
    %int0_624 = torch.constant.int 0
    %798 = torch.aten.add.int %int0_624, %797 : !torch.int, !torch.int -> !torch.int
    %int0_625 = torch.constant.int 0
    %int0_626 = torch.constant.int 0
    %int1_627 = torch.constant.int 1
    %799 = torch.aten.slice.Tensor %796, %int0_625, %int0_626, %798, %int1_627 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %799, [%292], affine_map<()[s0] -> (s0 * 32, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_628 = torch.constant.int 1
    %int0_629 = torch.constant.int 0
    %int9223372036854775807_630 = torch.constant.int 9223372036854775807
    %int1_631 = torch.constant.int 1
    %800 = torch.aten.slice.Tensor %799, %int1_628, %int0_629, %int9223372036854775807_630, %int1_631 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %800, [%292], affine_map<()[s0] -> (s0 * 32, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_632 = torch.constant.int 0
    %801 = torch.aten.unsqueeze %800, %int0_632 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %801, [%292], affine_map<()[s0] -> (1, s0 * 32, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_633 = torch.constant.int 2
    %802 = torch.aten.unsqueeze %801, %int2_633 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %802, [%292], affine_map<()[s0] -> (1, s0 * 32, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_634 = torch.constant.int 3
    %int0_635 = torch.constant.int 0
    %int9223372036854775807_636 = torch.constant.int 9223372036854775807
    %int1_637 = torch.constant.int 1
    %803 = torch.aten.slice.Tensor %802, %int3_634, %int0_635, %int9223372036854775807_636, %int1_637 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %803, [%292], affine_map<()[s0] -> (1, s0 * 32, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %804 = torch_c.to_builtin_tensor %751 : !torch.vtensor<[4,?,8,128],f16> -> tensor<4x?x8x128xf16>
    %c1_638 = arith.constant 1 : index
    %dim_639 = tensor.dim %804, %c1_638 : tensor<4x?x8x128xf16>
    %805 = flow.tensor.bitcast %804 : tensor<4x?x8x128xf16>{%dim_639} -> tensor<4x?x8x64xcomplex<f16>>{%dim_639}
    %806 = torch_c.from_builtin_tensor %805 : tensor<4x?x8x64xcomplex<f16>> -> !torch.vtensor<[4,?,8,64],complex<f16>>
    torch.bind_symbolic_shape %806, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 64)> : !torch.vtensor<[4,?,8,64],complex<f16>>
    %807 = torch.aten.mul.Tensor %806, %803 : !torch.vtensor<[4,?,8,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,8,64],complex<f32>>
    torch.bind_symbolic_shape %807, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 64)> : !torch.vtensor<[4,?,8,64],complex<f32>>
    %808 = torch_c.to_builtin_tensor %807 : !torch.vtensor<[4,?,8,64],complex<f32>> -> tensor<4x?x8x64xcomplex<f32>>
    %c1_640 = arith.constant 1 : index
    %dim_641 = tensor.dim %808, %c1_640 : tensor<4x?x8x64xcomplex<f32>>
    %809 = flow.tensor.bitcast %808 : tensor<4x?x8x64xcomplex<f32>>{%dim_641} -> tensor<4x?x8x128xf32>{%dim_641}
    %810 = torch_c.from_builtin_tensor %809 : tensor<4x?x8x128xf32> -> !torch.vtensor<[4,?,8,128],f32>
    torch.bind_symbolic_shape %810, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 128)> : !torch.vtensor<[4,?,8,128],f32>
    %int5_642 = torch.constant.int 5
    %811 = torch.prims.convert_element_type %810, %int5_642 : !torch.vtensor<[4,?,8,128],f32>, !torch.int -> !torch.vtensor<[4,?,8,128],f16>
    torch.bind_symbolic_shape %811, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 128)> : !torch.vtensor<[4,?,8,128],f16>
    %int64_643 = torch.constant.int 64
    %812 = torch.aten.mul.Scalar %arg2, %int64_643 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %812, [%292], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int4_644 = torch.constant.int 4
    %int1_645 = torch.constant.int 1
    %813 = torch.aten.add.Scalar %812, %int4_644, %int1_645 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %813, [%292], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int4_646 = torch.constant.int 4
    %int32_647 = torch.constant.int 32
    %int8_648 = torch.constant.int 8
    %int128_649 = torch.constant.int 128
    %814 = torch.prim.ListConstruct %int4_646, %425, %int32_647, %int8_648, %int128_649 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %815 = torch.aten.view %811, %814 : !torch.vtensor<[4,?,8,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,8,128],f16>
    torch.bind_symbolic_shape %815, [%292], affine_map<()[s0] -> (4, s0, 32, 8, 128)> : !torch.vtensor<[4,?,32,8,128],f16>
    %int4_650 = torch.constant.int 4
    %816 = torch.aten.mul.int %int4_650, %425 : !torch.int, !torch.int -> !torch.int
    %int32_651 = torch.constant.int 32
    %int8_652 = torch.constant.int 8
    %int128_653 = torch.constant.int 128
    %817 = torch.prim.ListConstruct %816, %int32_651, %int8_652, %int128_653 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %818 = torch.aten.view %815, %817 : !torch.vtensor<[4,?,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %818, [%292], affine_map<()[s0] -> (s0 * 4, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int4_654 = torch.constant.int 4
    %819 = torch.aten.mul.int %int4_654, %425 : !torch.int, !torch.int -> !torch.int
    %820 = torch.prim.ListConstruct %819 : (!torch.int) -> !torch.list<int>
    %821 = torch.aten.view %813, %820 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %821, [%292], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int32_655 = torch.constant.int 32
    %int2_656 = torch.constant.int 2
    %int32_657 = torch.constant.int 32
    %int8_658 = torch.constant.int 8
    %int128_659 = torch.constant.int 128
    %822 = torch.prim.ListConstruct %416, %int32_655, %int2_656, %int32_657, %int8_658, %int128_659 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %823 = torch.aten.view %655, %822 : !torch.vtensor<[?,2097152],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f16>
    torch.bind_symbolic_shape %823, [%293], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f16>
    %int32_660 = torch.constant.int 32
    %824 = torch.aten.mul.int %416, %int32_660 : !torch.int, !torch.int -> !torch.int
    %int2_661 = torch.constant.int 2
    %825 = torch.aten.mul.int %824, %int2_661 : !torch.int, !torch.int -> !torch.int
    %int32_662 = torch.constant.int 32
    %int8_663 = torch.constant.int 8
    %int128_664 = torch.constant.int 128
    %826 = torch.prim.ListConstruct %825, %int32_662, %int8_663, %int128_664 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %827 = torch.aten.view %823, %826 : !torch.vtensor<[?,32,2,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %827, [%293], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %828 = torch.prim.ListConstruct %821 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_665 = torch.constant.bool false
    %829 = torch.aten.index_put %827, %828, %818, %false_665 : !torch.vtensor<[?,32,8,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,32,8,128],f16>, !torch.bool -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %829, [%293], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int32_666 = torch.constant.int 32
    %int2_667 = torch.constant.int 2
    %int32_668 = torch.constant.int 32
    %int8_669 = torch.constant.int 8
    %int128_670 = torch.constant.int 128
    %830 = torch.prim.ListConstruct %416, %int32_666, %int2_667, %int32_668, %int8_669, %int128_670 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %831 = torch.aten.view %829, %830 : !torch.vtensor<[?,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f16>
    torch.bind_symbolic_shape %831, [%293], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f16>
    %int2097152_671 = torch.constant.int 2097152
    %832 = torch.prim.ListConstruct %416, %int2097152_671 : (!torch.int, !torch.int) -> !torch.list<int>
    %833 = torch.aten.view %831, %832 : !torch.vtensor<[?,32,2,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,2097152],f16>
    torch.bind_symbolic_shape %833, [%293], affine_map<()[s0] -> (s0, 2097152)> : !torch.vtensor<[?,2097152],f16>
    %int32_672 = torch.constant.int 32
    %int2_673 = torch.constant.int 2
    %int32_674 = torch.constant.int 32
    %int8_675 = torch.constant.int 8
    %int128_676 = torch.constant.int 128
    %834 = torch.prim.ListConstruct %416, %int32_672, %int2_673, %int32_674, %int8_675, %int128_676 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %835 = torch.aten.view %833, %834 : !torch.vtensor<[?,2097152],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f16>
    torch.bind_symbolic_shape %835, [%293], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f16>
    %int32_677 = torch.constant.int 32
    %int8_678 = torch.constant.int 8
    %int128_679 = torch.constant.int 128
    %836 = torch.prim.ListConstruct %825, %int32_677, %int8_678, %int128_679 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %837 = torch.aten.view %835, %836 : !torch.vtensor<[?,32,2,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %837, [%293], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int4_680 = torch.constant.int 4
    %int32_681 = torch.constant.int 32
    %int8_682 = torch.constant.int 8
    %int128_683 = torch.constant.int 128
    %838 = torch.prim.ListConstruct %int4_680, %425, %int32_681, %int8_682, %int128_683 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %839 = torch.aten.view %753, %838 : !torch.vtensor<[4,?,8,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,8,128],f16>
    torch.bind_symbolic_shape %839, [%292], affine_map<()[s0] -> (4, s0, 32, 8, 128)> : !torch.vtensor<[4,?,32,8,128],f16>
    %int4_684 = torch.constant.int 4
    %840 = torch.aten.mul.int %int4_684, %425 : !torch.int, !torch.int -> !torch.int
    %int32_685 = torch.constant.int 32
    %int8_686 = torch.constant.int 8
    %int128_687 = torch.constant.int 128
    %841 = torch.prim.ListConstruct %840, %int32_685, %int8_686, %int128_687 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %842 = torch.aten.view %839, %841 : !torch.vtensor<[4,?,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %842, [%292], affine_map<()[s0] -> (s0 * 4, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int1_688 = torch.constant.int 1
    %int1_689 = torch.constant.int 1
    %843 = torch.aten.add.Scalar %813, %int1_688, %int1_689 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %843, [%292], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int4_690 = torch.constant.int 4
    %844 = torch.aten.mul.int %int4_690, %425 : !torch.int, !torch.int -> !torch.int
    %845 = torch.prim.ListConstruct %844 : (!torch.int) -> !torch.list<int>
    %846 = torch.aten.view %843, %845 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %846, [%292], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %847 = torch.prim.ListConstruct %846 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_691 = torch.constant.bool false
    %848 = torch.aten.index_put %837, %847, %842, %false_691 : !torch.vtensor<[?,32,8,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,32,8,128],f16>, !torch.bool -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %848, [%293], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int32_692 = torch.constant.int 32
    %int2_693 = torch.constant.int 2
    %int32_694 = torch.constant.int 32
    %int8_695 = torch.constant.int 8
    %int128_696 = torch.constant.int 128
    %849 = torch.prim.ListConstruct %416, %int32_692, %int2_693, %int32_694, %int8_695, %int128_696 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %850 = torch.aten.view %848, %849 : !torch.vtensor<[?,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f16>
    torch.bind_symbolic_shape %850, [%293], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f16>
    %int2097152_697 = torch.constant.int 2097152
    %851 = torch.prim.ListConstruct %416, %int2097152_697 : (!torch.int, !torch.int) -> !torch.list<int>
    %852 = torch.aten.view %850, %851 : !torch.vtensor<[?,32,2,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,2097152],f16>
    torch.bind_symbolic_shape %852, [%293], affine_map<()[s0] -> (s0, 2097152)> : !torch.vtensor<[?,2097152],f16>
    %int-2_698 = torch.constant.int -2
    %853 = torch.aten.unsqueeze %811, %int-2_698 : !torch.vtensor<[4,?,8,128],f16>, !torch.int -> !torch.vtensor<[4,?,8,1,128],f16>
    torch.bind_symbolic_shape %853, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 1, 128)> : !torch.vtensor<[4,?,8,1,128],f16>
    %int4_699 = torch.constant.int 4
    %int8_700 = torch.constant.int 8
    %int4_701 = torch.constant.int 4
    %int128_702 = torch.constant.int 128
    %854 = torch.prim.ListConstruct %int4_699, %797, %int8_700, %int4_701, %int128_702 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_703 = torch.constant.bool false
    %855 = torch.aten.expand %853, %854, %false_703 : !torch.vtensor<[4,?,8,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,8,4,128],f16>
    torch.bind_symbolic_shape %855, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 4, 128)> : !torch.vtensor<[4,?,8,4,128],f16>
    %int0_704 = torch.constant.int 0
    %856 = torch.aten.clone %855, %int0_704 : !torch.vtensor<[4,?,8,4,128],f16>, !torch.int -> !torch.vtensor<[4,?,8,4,128],f16>
    torch.bind_symbolic_shape %856, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 4, 128)> : !torch.vtensor<[4,?,8,4,128],f16>
    %int4_705 = torch.constant.int 4
    %int32_706 = torch.constant.int 32
    %int128_707 = torch.constant.int 128
    %857 = torch.prim.ListConstruct %int4_705, %797, %int32_706, %int128_707 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %858 = torch.aten._unsafe_view %856, %857 : !torch.vtensor<[4,?,8,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %858, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int-2_708 = torch.constant.int -2
    %859 = torch.aten.unsqueeze %753, %int-2_708 : !torch.vtensor<[4,?,8,128],f16>, !torch.int -> !torch.vtensor<[4,?,8,1,128],f16>
    torch.bind_symbolic_shape %859, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 1, 128)> : !torch.vtensor<[4,?,8,1,128],f16>
    %int1_709 = torch.constant.int 1
    %860 = torch.aten.size.int %747, %int1_709 : !torch.vtensor<[4,?,1024],f16>, !torch.int -> !torch.int
    %int4_710 = torch.constant.int 4
    %int8_711 = torch.constant.int 8
    %int4_712 = torch.constant.int 4
    %int128_713 = torch.constant.int 128
    %861 = torch.prim.ListConstruct %int4_710, %860, %int8_711, %int4_712, %int128_713 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_714 = torch.constant.bool false
    %862 = torch.aten.expand %859, %861, %false_714 : !torch.vtensor<[4,?,8,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,8,4,128],f16>
    torch.bind_symbolic_shape %862, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 4, 128)> : !torch.vtensor<[4,?,8,4,128],f16>
    %int0_715 = torch.constant.int 0
    %863 = torch.aten.clone %862, %int0_715 : !torch.vtensor<[4,?,8,4,128],f16>, !torch.int -> !torch.vtensor<[4,?,8,4,128],f16>
    torch.bind_symbolic_shape %863, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 4, 128)> : !torch.vtensor<[4,?,8,4,128],f16>
    %int4_716 = torch.constant.int 4
    %int32_717 = torch.constant.int 32
    %int128_718 = torch.constant.int 128
    %864 = torch.prim.ListConstruct %int4_716, %860, %int32_717, %int128_718 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %865 = torch.aten._unsafe_view %863, %864 : !torch.vtensor<[4,?,8,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %865, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int1_719 = torch.constant.int 1
    %int2_720 = torch.constant.int 2
    %866 = torch.aten.transpose.int %782, %int1_719, %int2_720 : !torch.vtensor<[4,?,32,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,32,?,128],f16>
    torch.bind_symbolic_shape %866, [%292], affine_map<()[s0] -> (4, 32, s0 * 32, 128)> : !torch.vtensor<[4,32,?,128],f16>
    %int1_721 = torch.constant.int 1
    %int2_722 = torch.constant.int 2
    %867 = torch.aten.transpose.int %858, %int1_721, %int2_722 : !torch.vtensor<[4,?,32,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,32,?,128],f16>
    torch.bind_symbolic_shape %867, [%292], affine_map<()[s0] -> (4, 32, s0 * 32, 128)> : !torch.vtensor<[4,32,?,128],f16>
    %int1_723 = torch.constant.int 1
    %int2_724 = torch.constant.int 2
    %868 = torch.aten.transpose.int %865, %int1_723, %int2_724 : !torch.vtensor<[4,?,32,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,32,?,128],f16>
    torch.bind_symbolic_shape %868, [%292], affine_map<()[s0] -> (4, 32, s0 * 32, 128)> : !torch.vtensor<[4,32,?,128],f16>
    %float0.000000e00_725 = torch.constant.float 0.000000e+00
    %false_726 = torch.constant.bool false
    %none_727 = torch.constant.none
    %869:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%866, %867, %868, %float0.000000e00_725, %false_726, %320, %none_727) : (!torch.vtensor<[4,32,?,128],f16>, !torch.vtensor<[4,32,?,128],f16>, !torch.vtensor<[4,32,?,128],f16>, !torch.float, !torch.bool, !torch.vtensor<[4,1,?,?],f16>, !torch.none) -> (!torch.vtensor<[4,32,?,128],f16>, !torch.vtensor<[4,32,?],f32>) 
    torch.bind_symbolic_shape %869#0, [%292], affine_map<()[s0] -> (4, 32, s0 * 32, 128)> : !torch.vtensor<[4,32,?,128],f16>
    %int1_728 = torch.constant.int 1
    %int2_729 = torch.constant.int 2
    %870 = torch.aten.transpose.int %869#0, %int1_728, %int2_729 : !torch.vtensor<[4,32,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %870, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int4_730 = torch.constant.int 4
    %int4096_731 = torch.constant.int 4096
    %871 = torch.prim.ListConstruct %int4_730, %768, %int4096_731 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %872 = torch.aten.view %870, %871 : !torch.vtensor<[4,?,32,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %872, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int-2_732 = torch.constant.int -2
    %int-1_733 = torch.constant.int -1
    %873 = torch.aten.transpose.int %23, %int-2_732, %int-1_733 : !torch.vtensor<[4096,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,4096],f16>
    %int4_734 = torch.constant.int 4
    %874 = torch.aten.mul.int %int4_734, %768 : !torch.int, !torch.int -> !torch.int
    %int4096_735 = torch.constant.int 4096
    %875 = torch.prim.ListConstruct %874, %int4096_735 : (!torch.int, !torch.int) -> !torch.list<int>
    %876 = torch.aten.view %872, %875 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %876, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %877 = torch.aten.mm %876, %873 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %877, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_736 = torch.constant.int 4
    %int4096_737 = torch.constant.int 4096
    %878 = torch.prim.ListConstruct %int4_736, %768, %int4096_737 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %879 = torch.aten.view %877, %878 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %879, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_738 = torch.constant.int 1
    %880 = torch.aten.add.Tensor %717, %879, %int1_738 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %880, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int6_739 = torch.constant.int 6
    %881 = torch.prims.convert_element_type %880, %int6_739 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %881, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_740 = torch.constant.int 2
    %882 = torch.aten.pow.Tensor_Scalar %881, %int2_740 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %882, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int-1_741 = torch.constant.int -1
    %883 = torch.prim.ListConstruct %int-1_741 : (!torch.int) -> !torch.list<int>
    %true_742 = torch.constant.bool true
    %none_743 = torch.constant.none
    %884 = torch.aten.mean.dim %882, %883, %true_742, %none_743 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %884, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_744 = torch.constant.float 9.9999997473787516E-6
    %int1_745 = torch.constant.int 1
    %885 = torch.aten.add.Scalar %884, %float9.999990e-06_744, %int1_745 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %885, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %886 = torch.aten.rsqrt %885 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %886, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %887 = torch.aten.mul.Tensor %881, %886 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %887, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %888 = torch.aten.mul.Tensor %24, %887 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %888, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int5_746 = torch.constant.int 5
    %889 = torch.prims.convert_element_type %888, %int5_746 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %889, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int-2_747 = torch.constant.int -2
    %int-1_748 = torch.constant.int -1
    %890 = torch.aten.transpose.int %25, %int-2_747, %int-1_748 : !torch.vtensor<[14336,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,14336],f16>
    %int4_749 = torch.constant.int 4
    %891 = torch.aten.mul.int %int4_749, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_750 = torch.constant.int 4096
    %892 = torch.prim.ListConstruct %891, %int4096_750 : (!torch.int, !torch.int) -> !torch.list<int>
    %893 = torch.aten.view %889, %892 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %893, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %894 = torch.aten.mm %893, %890 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,14336],f16> -> !torch.vtensor<[?,14336],f16>
    torch.bind_symbolic_shape %894, [%292], affine_map<()[s0] -> (s0 * 128, 14336)> : !torch.vtensor<[?,14336],f16>
    %int4_751 = torch.constant.int 4
    %int14336_752 = torch.constant.int 14336
    %895 = torch.prim.ListConstruct %int4_751, %294, %int14336_752 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %896 = torch.aten.view %894, %895 : !torch.vtensor<[?,14336],f16>, !torch.list<int> -> !torch.vtensor<[4,?,14336],f16>
    torch.bind_symbolic_shape %896, [%292], affine_map<()[s0] -> (4, s0 * 32, 14336)> : !torch.vtensor<[4,?,14336],f16>
    %897 = torch.aten.silu %896 : !torch.vtensor<[4,?,14336],f16> -> !torch.vtensor<[4,?,14336],f16>
    torch.bind_symbolic_shape %897, [%292], affine_map<()[s0] -> (4, s0 * 32, 14336)> : !torch.vtensor<[4,?,14336],f16>
    %int-2_753 = torch.constant.int -2
    %int-1_754 = torch.constant.int -1
    %898 = torch.aten.transpose.int %26, %int-2_753, %int-1_754 : !torch.vtensor<[14336,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,14336],f16>
    %int4_755 = torch.constant.int 4
    %899 = torch.aten.mul.int %int4_755, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_756 = torch.constant.int 4096
    %900 = torch.prim.ListConstruct %899, %int4096_756 : (!torch.int, !torch.int) -> !torch.list<int>
    %901 = torch.aten.view %889, %900 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %901, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %902 = torch.aten.mm %901, %898 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,14336],f16> -> !torch.vtensor<[?,14336],f16>
    torch.bind_symbolic_shape %902, [%292], affine_map<()[s0] -> (s0 * 128, 14336)> : !torch.vtensor<[?,14336],f16>
    %int4_757 = torch.constant.int 4
    %int14336_758 = torch.constant.int 14336
    %903 = torch.prim.ListConstruct %int4_757, %294, %int14336_758 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %904 = torch.aten.view %902, %903 : !torch.vtensor<[?,14336],f16>, !torch.list<int> -> !torch.vtensor<[4,?,14336],f16>
    torch.bind_symbolic_shape %904, [%292], affine_map<()[s0] -> (4, s0 * 32, 14336)> : !torch.vtensor<[4,?,14336],f16>
    %905 = torch.aten.mul.Tensor %897, %904 : !torch.vtensor<[4,?,14336],f16>, !torch.vtensor<[4,?,14336],f16> -> !torch.vtensor<[4,?,14336],f16>
    torch.bind_symbolic_shape %905, [%292], affine_map<()[s0] -> (4, s0 * 32, 14336)> : !torch.vtensor<[4,?,14336],f16>
    %int-2_759 = torch.constant.int -2
    %int-1_760 = torch.constant.int -1
    %906 = torch.aten.transpose.int %27, %int-2_759, %int-1_760 : !torch.vtensor<[4096,14336],f16>, !torch.int, !torch.int -> !torch.vtensor<[14336,4096],f16>
    %int1_761 = torch.constant.int 1
    %907 = torch.aten.size.int %896, %int1_761 : !torch.vtensor<[4,?,14336],f16>, !torch.int -> !torch.int
    %int4_762 = torch.constant.int 4
    %908 = torch.aten.mul.int %int4_762, %907 : !torch.int, !torch.int -> !torch.int
    %int14336_763 = torch.constant.int 14336
    %909 = torch.prim.ListConstruct %908, %int14336_763 : (!torch.int, !torch.int) -> !torch.list<int>
    %910 = torch.aten.view %905, %909 : !torch.vtensor<[4,?,14336],f16>, !torch.list<int> -> !torch.vtensor<[?,14336],f16>
    torch.bind_symbolic_shape %910, [%292], affine_map<()[s0] -> (s0 * 128, 14336)> : !torch.vtensor<[?,14336],f16>
    %911 = torch.aten.mm %910, %906 : !torch.vtensor<[?,14336],f16>, !torch.vtensor<[14336,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %911, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_764 = torch.constant.int 4
    %int4096_765 = torch.constant.int 4096
    %912 = torch.prim.ListConstruct %int4_764, %907, %int4096_765 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %913 = torch.aten.view %911, %912 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %913, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_766 = torch.constant.int 1
    %914 = torch.aten.add.Tensor %880, %913, %int1_766 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %914, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int6_767 = torch.constant.int 6
    %915 = torch.prims.convert_element_type %914, %int6_767 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %915, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_768 = torch.constant.int 2
    %916 = torch.aten.pow.Tensor_Scalar %915, %int2_768 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %916, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int-1_769 = torch.constant.int -1
    %917 = torch.prim.ListConstruct %int-1_769 : (!torch.int) -> !torch.list<int>
    %true_770 = torch.constant.bool true
    %none_771 = torch.constant.none
    %918 = torch.aten.mean.dim %916, %917, %true_770, %none_771 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %918, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_772 = torch.constant.float 9.9999997473787516E-6
    %int1_773 = torch.constant.int 1
    %919 = torch.aten.add.Scalar %918, %float9.999990e-06_772, %int1_773 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %919, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %920 = torch.aten.rsqrt %919 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %920, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %921 = torch.aten.mul.Tensor %915, %920 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %921, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %922 = torch.aten.mul.Tensor %28, %921 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %922, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int5_774 = torch.constant.int 5
    %923 = torch.prims.convert_element_type %922, %int5_774 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %923, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int-2_775 = torch.constant.int -2
    %int-1_776 = torch.constant.int -1
    %924 = torch.aten.transpose.int %29, %int-2_775, %int-1_776 : !torch.vtensor<[4096,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,4096],f16>
    %int4_777 = torch.constant.int 4
    %925 = torch.aten.mul.int %int4_777, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_778 = torch.constant.int 4096
    %926 = torch.prim.ListConstruct %925, %int4096_778 : (!torch.int, !torch.int) -> !torch.list<int>
    %927 = torch.aten.view %923, %926 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %927, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %928 = torch.aten.mm %927, %924 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %928, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_779 = torch.constant.int 4
    %int4096_780 = torch.constant.int 4096
    %929 = torch.prim.ListConstruct %int4_779, %294, %int4096_780 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %930 = torch.aten.view %928, %929 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %930, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int-2_781 = torch.constant.int -2
    %int-1_782 = torch.constant.int -1
    %931 = torch.aten.transpose.int %30, %int-2_781, %int-1_782 : !torch.vtensor<[1024,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,1024],f16>
    %int4_783 = torch.constant.int 4
    %932 = torch.aten.mul.int %int4_783, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_784 = torch.constant.int 4096
    %933 = torch.prim.ListConstruct %932, %int4096_784 : (!torch.int, !torch.int) -> !torch.list<int>
    %934 = torch.aten.view %923, %933 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %934, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %935 = torch.aten.mm %934, %931 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1024],f16> -> !torch.vtensor<[?,1024],f16>
    torch.bind_symbolic_shape %935, [%292], affine_map<()[s0] -> (s0 * 128, 1024)> : !torch.vtensor<[?,1024],f16>
    %int4_785 = torch.constant.int 4
    %int1024_786 = torch.constant.int 1024
    %936 = torch.prim.ListConstruct %int4_785, %294, %int1024_786 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %937 = torch.aten.view %935, %936 : !torch.vtensor<[?,1024],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1024],f16>
    torch.bind_symbolic_shape %937, [%292], affine_map<()[s0] -> (4, s0 * 32, 1024)> : !torch.vtensor<[4,?,1024],f16>
    %int-2_787 = torch.constant.int -2
    %int-1_788 = torch.constant.int -1
    %938 = torch.aten.transpose.int %31, %int-2_787, %int-1_788 : !torch.vtensor<[1024,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,1024],f16>
    %int4_789 = torch.constant.int 4
    %939 = torch.aten.mul.int %int4_789, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_790 = torch.constant.int 4096
    %940 = torch.prim.ListConstruct %939, %int4096_790 : (!torch.int, !torch.int) -> !torch.list<int>
    %941 = torch.aten.view %923, %940 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %941, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %942 = torch.aten.mm %941, %938 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1024],f16> -> !torch.vtensor<[?,1024],f16>
    torch.bind_symbolic_shape %942, [%292], affine_map<()[s0] -> (s0 * 128, 1024)> : !torch.vtensor<[?,1024],f16>
    %int4_791 = torch.constant.int 4
    %int1024_792 = torch.constant.int 1024
    %943 = torch.prim.ListConstruct %int4_791, %294, %int1024_792 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %944 = torch.aten.view %942, %943 : !torch.vtensor<[?,1024],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1024],f16>
    torch.bind_symbolic_shape %944, [%292], affine_map<()[s0] -> (4, s0 * 32, 1024)> : !torch.vtensor<[4,?,1024],f16>
    %int4_793 = torch.constant.int 4
    %int32_794 = torch.constant.int 32
    %int128_795 = torch.constant.int 128
    %945 = torch.prim.ListConstruct %int4_793, %294, %int32_794, %int128_795 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %946 = torch.aten.view %930, %945 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %946, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int4_796 = torch.constant.int 4
    %int8_797 = torch.constant.int 8
    %int128_798 = torch.constant.int 128
    %947 = torch.prim.ListConstruct %int4_796, %294, %int8_797, %int128_798 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %948 = torch.aten.view %937, %947 : !torch.vtensor<[4,?,1024],f16>, !torch.list<int> -> !torch.vtensor<[4,?,8,128],f16>
    torch.bind_symbolic_shape %948, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 128)> : !torch.vtensor<[4,?,8,128],f16>
    %int4_799 = torch.constant.int 4
    %int8_800 = torch.constant.int 8
    %int128_801 = torch.constant.int 128
    %949 = torch.prim.ListConstruct %int4_799, %294, %int8_800, %int128_801 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %950 = torch.aten.view %944, %949 : !torch.vtensor<[4,?,1024],f16>, !torch.list<int> -> !torch.vtensor<[4,?,8,128],f16>
    torch.bind_symbolic_shape %950, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 128)> : !torch.vtensor<[4,?,8,128],f16>
    %int131072_802 = torch.constant.int 131072
    %none_803 = torch.constant.none
    %none_804 = torch.constant.none
    %cpu_805 = torch.constant.device "cpu"
    %false_806 = torch.constant.bool false
    %951 = torch.aten.arange %int131072_802, %none_803, %none_804, %cpu_805, %false_806 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64>
    %int0_807 = torch.constant.int 0
    %int128_808 = torch.constant.int 128
    %int2_809 = torch.constant.int 2
    %none_810 = torch.constant.none
    %none_811 = torch.constant.none
    %cpu_812 = torch.constant.device "cpu"
    %false_813 = torch.constant.bool false
    %952 = torch.aten.arange.start_step %int0_807, %int128_808, %int2_809, %none_810, %none_811, %cpu_812, %false_813 : !torch.int, !torch.int, !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64>
    %int0_814 = torch.constant.int 0
    %int0_815 = torch.constant.int 0
    %int64_816 = torch.constant.int 64
    %int1_817 = torch.constant.int 1
    %953 = torch.aten.slice.Tensor %952, %int0_814, %int0_815, %int64_816, %int1_817 : !torch.vtensor<[64],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[64],si64>
    %int6_818 = torch.constant.int 6
    %954 = torch.prims.convert_element_type %953, %int6_818 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32>
    %int128_819 = torch.constant.int 128
    %955 = torch.aten.div.Scalar %954, %int128_819 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32>
    %float5.000000e05_820 = torch.constant.float 5.000000e+05
    %956 = torch.aten.pow.Scalar %float5.000000e05_820, %955 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %957 = torch.aten.reciprocal %956 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %float1.000000e00_821 = torch.constant.float 1.000000e+00
    %958 = torch.aten.mul.Scalar %957, %float1.000000e00_821 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32>
    %int131072_822 = torch.constant.int 131072
    %int1_823 = torch.constant.int 1
    %959 = torch.prim.ListConstruct %int131072_822, %int1_823 : (!torch.int, !torch.int) -> !torch.list<int>
    %960 = torch.aten.view %951, %959 : !torch.vtensor<[131072],si64>, !torch.list<int> -> !torch.vtensor<[131072,1],si64>
    %961 = torch.aten.mul.Tensor %960, %958 : !torch.vtensor<[131072,1],si64>, !torch.vtensor<[64],f32> -> !torch.vtensor<[131072,64],f32>
    %962 = torch.aten.cos %961 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %963 = torch.aten.sin %961 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %964 = torch.aten.complex %962, %963 : !torch.vtensor<[131072,64],f32>, !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],complex<f32>>
    %int1_824 = torch.constant.int 1
    %965 = torch.aten.size.int %930, %int1_824 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.int
    %int0_825 = torch.constant.int 0
    %966 = torch.aten.add.int %int0_825, %965 : !torch.int, !torch.int -> !torch.int
    %int0_826 = torch.constant.int 0
    %int0_827 = torch.constant.int 0
    %int1_828 = torch.constant.int 1
    %967 = torch.aten.slice.Tensor %964, %int0_826, %int0_827, %966, %int1_828 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %967, [%292], affine_map<()[s0] -> (s0 * 32, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_829 = torch.constant.int 1
    %int0_830 = torch.constant.int 0
    %int9223372036854775807_831 = torch.constant.int 9223372036854775807
    %int1_832 = torch.constant.int 1
    %968 = torch.aten.slice.Tensor %967, %int1_829, %int0_830, %int9223372036854775807_831, %int1_832 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %968, [%292], affine_map<()[s0] -> (s0 * 32, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_833 = torch.constant.int 0
    %969 = torch.aten.unsqueeze %968, %int0_833 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %969, [%292], affine_map<()[s0] -> (1, s0 * 32, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_834 = torch.constant.int 2
    %970 = torch.aten.unsqueeze %969, %int2_834 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %970, [%292], affine_map<()[s0] -> (1, s0 * 32, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_835 = torch.constant.int 3
    %int0_836 = torch.constant.int 0
    %int9223372036854775807_837 = torch.constant.int 9223372036854775807
    %int1_838 = torch.constant.int 1
    %971 = torch.aten.slice.Tensor %970, %int3_835, %int0_836, %int9223372036854775807_837, %int1_838 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %971, [%292], affine_map<()[s0] -> (1, s0 * 32, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %972 = torch_c.to_builtin_tensor %946 : !torch.vtensor<[4,?,32,128],f16> -> tensor<4x?x32x128xf16>
    %c1_839 = arith.constant 1 : index
    %dim_840 = tensor.dim %972, %c1_839 : tensor<4x?x32x128xf16>
    %973 = flow.tensor.bitcast %972 : tensor<4x?x32x128xf16>{%dim_840} -> tensor<4x?x32x64xcomplex<f16>>{%dim_840}
    %974 = torch_c.from_builtin_tensor %973 : tensor<4x?x32x64xcomplex<f16>> -> !torch.vtensor<[4,?,32,64],complex<f16>>
    torch.bind_symbolic_shape %974, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 64)> : !torch.vtensor<[4,?,32,64],complex<f16>>
    %975 = torch.aten.mul.Tensor %974, %971 : !torch.vtensor<[4,?,32,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,32,64],complex<f32>>
    torch.bind_symbolic_shape %975, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 64)> : !torch.vtensor<[4,?,32,64],complex<f32>>
    %976 = torch_c.to_builtin_tensor %975 : !torch.vtensor<[4,?,32,64],complex<f32>> -> tensor<4x?x32x64xcomplex<f32>>
    %c1_841 = arith.constant 1 : index
    %dim_842 = tensor.dim %976, %c1_841 : tensor<4x?x32x64xcomplex<f32>>
    %977 = flow.tensor.bitcast %976 : tensor<4x?x32x64xcomplex<f32>>{%dim_842} -> tensor<4x?x32x128xf32>{%dim_842}
    %978 = torch_c.from_builtin_tensor %977 : tensor<4x?x32x128xf32> -> !torch.vtensor<[4,?,32,128],f32>
    torch.bind_symbolic_shape %978, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f32>
    %int5_843 = torch.constant.int 5
    %979 = torch.prims.convert_element_type %978, %int5_843 : !torch.vtensor<[4,?,32,128],f32>, !torch.int -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %979, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int131072_844 = torch.constant.int 131072
    %none_845 = torch.constant.none
    %none_846 = torch.constant.none
    %cpu_847 = torch.constant.device "cpu"
    %false_848 = torch.constant.bool false
    %980 = torch.aten.arange %int131072_844, %none_845, %none_846, %cpu_847, %false_848 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64>
    %int0_849 = torch.constant.int 0
    %int128_850 = torch.constant.int 128
    %int2_851 = torch.constant.int 2
    %none_852 = torch.constant.none
    %none_853 = torch.constant.none
    %cpu_854 = torch.constant.device "cpu"
    %false_855 = torch.constant.bool false
    %981 = torch.aten.arange.start_step %int0_849, %int128_850, %int2_851, %none_852, %none_853, %cpu_854, %false_855 : !torch.int, !torch.int, !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64>
    %int0_856 = torch.constant.int 0
    %int0_857 = torch.constant.int 0
    %int64_858 = torch.constant.int 64
    %int1_859 = torch.constant.int 1
    %982 = torch.aten.slice.Tensor %981, %int0_856, %int0_857, %int64_858, %int1_859 : !torch.vtensor<[64],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[64],si64>
    %int6_860 = torch.constant.int 6
    %983 = torch.prims.convert_element_type %982, %int6_860 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32>
    %int128_861 = torch.constant.int 128
    %984 = torch.aten.div.Scalar %983, %int128_861 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32>
    %float5.000000e05_862 = torch.constant.float 5.000000e+05
    %985 = torch.aten.pow.Scalar %float5.000000e05_862, %984 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %986 = torch.aten.reciprocal %985 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %float1.000000e00_863 = torch.constant.float 1.000000e+00
    %987 = torch.aten.mul.Scalar %986, %float1.000000e00_863 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32>
    %int131072_864 = torch.constant.int 131072
    %int1_865 = torch.constant.int 1
    %988 = torch.prim.ListConstruct %int131072_864, %int1_865 : (!torch.int, !torch.int) -> !torch.list<int>
    %989 = torch.aten.view %980, %988 : !torch.vtensor<[131072],si64>, !torch.list<int> -> !torch.vtensor<[131072,1],si64>
    %990 = torch.aten.mul.Tensor %989, %987 : !torch.vtensor<[131072,1],si64>, !torch.vtensor<[64],f32> -> !torch.vtensor<[131072,64],f32>
    %991 = torch.aten.cos %990 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %992 = torch.aten.sin %990 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %993 = torch.aten.complex %991, %992 : !torch.vtensor<[131072,64],f32>, !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],complex<f32>>
    %int1_866 = torch.constant.int 1
    %994 = torch.aten.size.int %937, %int1_866 : !torch.vtensor<[4,?,1024],f16>, !torch.int -> !torch.int
    %int0_867 = torch.constant.int 0
    %995 = torch.aten.add.int %int0_867, %994 : !torch.int, !torch.int -> !torch.int
    %int0_868 = torch.constant.int 0
    %int0_869 = torch.constant.int 0
    %int1_870 = torch.constant.int 1
    %996 = torch.aten.slice.Tensor %993, %int0_868, %int0_869, %995, %int1_870 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %996, [%292], affine_map<()[s0] -> (s0 * 32, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_871 = torch.constant.int 1
    %int0_872 = torch.constant.int 0
    %int9223372036854775807_873 = torch.constant.int 9223372036854775807
    %int1_874 = torch.constant.int 1
    %997 = torch.aten.slice.Tensor %996, %int1_871, %int0_872, %int9223372036854775807_873, %int1_874 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %997, [%292], affine_map<()[s0] -> (s0 * 32, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_875 = torch.constant.int 0
    %998 = torch.aten.unsqueeze %997, %int0_875 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %998, [%292], affine_map<()[s0] -> (1, s0 * 32, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_876 = torch.constant.int 2
    %999 = torch.aten.unsqueeze %998, %int2_876 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %999, [%292], affine_map<()[s0] -> (1, s0 * 32, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_877 = torch.constant.int 3
    %int0_878 = torch.constant.int 0
    %int9223372036854775807_879 = torch.constant.int 9223372036854775807
    %int1_880 = torch.constant.int 1
    %1000 = torch.aten.slice.Tensor %999, %int3_877, %int0_878, %int9223372036854775807_879, %int1_880 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %1000, [%292], affine_map<()[s0] -> (1, s0 * 32, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %1001 = torch_c.to_builtin_tensor %948 : !torch.vtensor<[4,?,8,128],f16> -> tensor<4x?x8x128xf16>
    %c1_881 = arith.constant 1 : index
    %dim_882 = tensor.dim %1001, %c1_881 : tensor<4x?x8x128xf16>
    %1002 = flow.tensor.bitcast %1001 : tensor<4x?x8x128xf16>{%dim_882} -> tensor<4x?x8x64xcomplex<f16>>{%dim_882}
    %1003 = torch_c.from_builtin_tensor %1002 : tensor<4x?x8x64xcomplex<f16>> -> !torch.vtensor<[4,?,8,64],complex<f16>>
    torch.bind_symbolic_shape %1003, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 64)> : !torch.vtensor<[4,?,8,64],complex<f16>>
    %1004 = torch.aten.mul.Tensor %1003, %1000 : !torch.vtensor<[4,?,8,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,8,64],complex<f32>>
    torch.bind_symbolic_shape %1004, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 64)> : !torch.vtensor<[4,?,8,64],complex<f32>>
    %1005 = torch_c.to_builtin_tensor %1004 : !torch.vtensor<[4,?,8,64],complex<f32>> -> tensor<4x?x8x64xcomplex<f32>>
    %c1_883 = arith.constant 1 : index
    %dim_884 = tensor.dim %1005, %c1_883 : tensor<4x?x8x64xcomplex<f32>>
    %1006 = flow.tensor.bitcast %1005 : tensor<4x?x8x64xcomplex<f32>>{%dim_884} -> tensor<4x?x8x128xf32>{%dim_884}
    %1007 = torch_c.from_builtin_tensor %1006 : tensor<4x?x8x128xf32> -> !torch.vtensor<[4,?,8,128],f32>
    torch.bind_symbolic_shape %1007, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 128)> : !torch.vtensor<[4,?,8,128],f32>
    %int5_885 = torch.constant.int 5
    %1008 = torch.prims.convert_element_type %1007, %int5_885 : !torch.vtensor<[4,?,8,128],f32>, !torch.int -> !torch.vtensor<[4,?,8,128],f16>
    torch.bind_symbolic_shape %1008, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 128)> : !torch.vtensor<[4,?,8,128],f16>
    %int64_886 = torch.constant.int 64
    %1009 = torch.aten.mul.Scalar %arg2, %int64_886 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %1009, [%292], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int6_887 = torch.constant.int 6
    %int1_888 = torch.constant.int 1
    %1010 = torch.aten.add.Scalar %1009, %int6_887, %int1_888 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %1010, [%292], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int4_889 = torch.constant.int 4
    %int32_890 = torch.constant.int 32
    %int8_891 = torch.constant.int 8
    %int128_892 = torch.constant.int 128
    %1011 = torch.prim.ListConstruct %int4_889, %425, %int32_890, %int8_891, %int128_892 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1012 = torch.aten.view %1008, %1011 : !torch.vtensor<[4,?,8,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,8,128],f16>
    torch.bind_symbolic_shape %1012, [%292], affine_map<()[s0] -> (4, s0, 32, 8, 128)> : !torch.vtensor<[4,?,32,8,128],f16>
    %int4_893 = torch.constant.int 4
    %1013 = torch.aten.mul.int %int4_893, %425 : !torch.int, !torch.int -> !torch.int
    %int32_894 = torch.constant.int 32
    %int8_895 = torch.constant.int 8
    %int128_896 = torch.constant.int 128
    %1014 = torch.prim.ListConstruct %1013, %int32_894, %int8_895, %int128_896 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1015 = torch.aten.view %1012, %1014 : !torch.vtensor<[4,?,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %1015, [%292], affine_map<()[s0] -> (s0 * 4, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int4_897 = torch.constant.int 4
    %1016 = torch.aten.mul.int %int4_897, %425 : !torch.int, !torch.int -> !torch.int
    %1017 = torch.prim.ListConstruct %1016 : (!torch.int) -> !torch.list<int>
    %1018 = torch.aten.view %1010, %1017 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %1018, [%292], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int32_898 = torch.constant.int 32
    %int2_899 = torch.constant.int 2
    %int32_900 = torch.constant.int 32
    %int8_901 = torch.constant.int 8
    %int128_902 = torch.constant.int 128
    %1019 = torch.prim.ListConstruct %416, %int32_898, %int2_899, %int32_900, %int8_901, %int128_902 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1020 = torch.aten.view %852, %1019 : !torch.vtensor<[?,2097152],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f16>
    torch.bind_symbolic_shape %1020, [%293], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f16>
    %int32_903 = torch.constant.int 32
    %1021 = torch.aten.mul.int %416, %int32_903 : !torch.int, !torch.int -> !torch.int
    %int2_904 = torch.constant.int 2
    %1022 = torch.aten.mul.int %1021, %int2_904 : !torch.int, !torch.int -> !torch.int
    %int32_905 = torch.constant.int 32
    %int8_906 = torch.constant.int 8
    %int128_907 = torch.constant.int 128
    %1023 = torch.prim.ListConstruct %1022, %int32_905, %int8_906, %int128_907 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1024 = torch.aten.view %1020, %1023 : !torch.vtensor<[?,32,2,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %1024, [%293], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %1025 = torch.prim.ListConstruct %1018 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_908 = torch.constant.bool false
    %1026 = torch.aten.index_put %1024, %1025, %1015, %false_908 : !torch.vtensor<[?,32,8,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,32,8,128],f16>, !torch.bool -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %1026, [%293], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int32_909 = torch.constant.int 32
    %int2_910 = torch.constant.int 2
    %int32_911 = torch.constant.int 32
    %int8_912 = torch.constant.int 8
    %int128_913 = torch.constant.int 128
    %1027 = torch.prim.ListConstruct %416, %int32_909, %int2_910, %int32_911, %int8_912, %int128_913 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1028 = torch.aten.view %1026, %1027 : !torch.vtensor<[?,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f16>
    torch.bind_symbolic_shape %1028, [%293], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f16>
    %int2097152_914 = torch.constant.int 2097152
    %1029 = torch.prim.ListConstruct %416, %int2097152_914 : (!torch.int, !torch.int) -> !torch.list<int>
    %1030 = torch.aten.view %1028, %1029 : !torch.vtensor<[?,32,2,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,2097152],f16>
    torch.bind_symbolic_shape %1030, [%293], affine_map<()[s0] -> (s0, 2097152)> : !torch.vtensor<[?,2097152],f16>
    %int32_915 = torch.constant.int 32
    %int2_916 = torch.constant.int 2
    %int32_917 = torch.constant.int 32
    %int8_918 = torch.constant.int 8
    %int128_919 = torch.constant.int 128
    %1031 = torch.prim.ListConstruct %416, %int32_915, %int2_916, %int32_917, %int8_918, %int128_919 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1032 = torch.aten.view %1030, %1031 : !torch.vtensor<[?,2097152],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f16>
    torch.bind_symbolic_shape %1032, [%293], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f16>
    %int32_920 = torch.constant.int 32
    %int8_921 = torch.constant.int 8
    %int128_922 = torch.constant.int 128
    %1033 = torch.prim.ListConstruct %1022, %int32_920, %int8_921, %int128_922 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1034 = torch.aten.view %1032, %1033 : !torch.vtensor<[?,32,2,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %1034, [%293], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int4_923 = torch.constant.int 4
    %int32_924 = torch.constant.int 32
    %int8_925 = torch.constant.int 8
    %int128_926 = torch.constant.int 128
    %1035 = torch.prim.ListConstruct %int4_923, %425, %int32_924, %int8_925, %int128_926 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1036 = torch.aten.view %950, %1035 : !torch.vtensor<[4,?,8,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,8,128],f16>
    torch.bind_symbolic_shape %1036, [%292], affine_map<()[s0] -> (4, s0, 32, 8, 128)> : !torch.vtensor<[4,?,32,8,128],f16>
    %int4_927 = torch.constant.int 4
    %1037 = torch.aten.mul.int %int4_927, %425 : !torch.int, !torch.int -> !torch.int
    %int32_928 = torch.constant.int 32
    %int8_929 = torch.constant.int 8
    %int128_930 = torch.constant.int 128
    %1038 = torch.prim.ListConstruct %1037, %int32_928, %int8_929, %int128_930 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1039 = torch.aten.view %1036, %1038 : !torch.vtensor<[4,?,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %1039, [%292], affine_map<()[s0] -> (s0 * 4, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int1_931 = torch.constant.int 1
    %int1_932 = torch.constant.int 1
    %1040 = torch.aten.add.Scalar %1010, %int1_931, %int1_932 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %1040, [%292], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int4_933 = torch.constant.int 4
    %1041 = torch.aten.mul.int %int4_933, %425 : !torch.int, !torch.int -> !torch.int
    %1042 = torch.prim.ListConstruct %1041 : (!torch.int) -> !torch.list<int>
    %1043 = torch.aten.view %1040, %1042 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %1043, [%292], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %1044 = torch.prim.ListConstruct %1043 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_934 = torch.constant.bool false
    %1045 = torch.aten.index_put %1034, %1044, %1039, %false_934 : !torch.vtensor<[?,32,8,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,32,8,128],f16>, !torch.bool -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %1045, [%293], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int32_935 = torch.constant.int 32
    %int2_936 = torch.constant.int 2
    %int32_937 = torch.constant.int 32
    %int8_938 = torch.constant.int 8
    %int128_939 = torch.constant.int 128
    %1046 = torch.prim.ListConstruct %416, %int32_935, %int2_936, %int32_937, %int8_938, %int128_939 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1047 = torch.aten.view %1045, %1046 : !torch.vtensor<[?,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f16>
    torch.bind_symbolic_shape %1047, [%293], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f16>
    %int2097152_940 = torch.constant.int 2097152
    %1048 = torch.prim.ListConstruct %416, %int2097152_940 : (!torch.int, !torch.int) -> !torch.list<int>
    %1049 = torch.aten.view %1047, %1048 : !torch.vtensor<[?,32,2,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,2097152],f16>
    torch.bind_symbolic_shape %1049, [%293], affine_map<()[s0] -> (s0, 2097152)> : !torch.vtensor<[?,2097152],f16>
    %int-2_941 = torch.constant.int -2
    %1050 = torch.aten.unsqueeze %1008, %int-2_941 : !torch.vtensor<[4,?,8,128],f16>, !torch.int -> !torch.vtensor<[4,?,8,1,128],f16>
    torch.bind_symbolic_shape %1050, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 1, 128)> : !torch.vtensor<[4,?,8,1,128],f16>
    %int4_942 = torch.constant.int 4
    %int8_943 = torch.constant.int 8
    %int4_944 = torch.constant.int 4
    %int128_945 = torch.constant.int 128
    %1051 = torch.prim.ListConstruct %int4_942, %994, %int8_943, %int4_944, %int128_945 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_946 = torch.constant.bool false
    %1052 = torch.aten.expand %1050, %1051, %false_946 : !torch.vtensor<[4,?,8,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,8,4,128],f16>
    torch.bind_symbolic_shape %1052, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 4, 128)> : !torch.vtensor<[4,?,8,4,128],f16>
    %int0_947 = torch.constant.int 0
    %1053 = torch.aten.clone %1052, %int0_947 : !torch.vtensor<[4,?,8,4,128],f16>, !torch.int -> !torch.vtensor<[4,?,8,4,128],f16>
    torch.bind_symbolic_shape %1053, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 4, 128)> : !torch.vtensor<[4,?,8,4,128],f16>
    %int4_948 = torch.constant.int 4
    %int32_949 = torch.constant.int 32
    %int128_950 = torch.constant.int 128
    %1054 = torch.prim.ListConstruct %int4_948, %994, %int32_949, %int128_950 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1055 = torch.aten._unsafe_view %1053, %1054 : !torch.vtensor<[4,?,8,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %1055, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int-2_951 = torch.constant.int -2
    %1056 = torch.aten.unsqueeze %950, %int-2_951 : !torch.vtensor<[4,?,8,128],f16>, !torch.int -> !torch.vtensor<[4,?,8,1,128],f16>
    torch.bind_symbolic_shape %1056, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 1, 128)> : !torch.vtensor<[4,?,8,1,128],f16>
    %int1_952 = torch.constant.int 1
    %1057 = torch.aten.size.int %944, %int1_952 : !torch.vtensor<[4,?,1024],f16>, !torch.int -> !torch.int
    %int4_953 = torch.constant.int 4
    %int8_954 = torch.constant.int 8
    %int4_955 = torch.constant.int 4
    %int128_956 = torch.constant.int 128
    %1058 = torch.prim.ListConstruct %int4_953, %1057, %int8_954, %int4_955, %int128_956 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_957 = torch.constant.bool false
    %1059 = torch.aten.expand %1056, %1058, %false_957 : !torch.vtensor<[4,?,8,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,8,4,128],f16>
    torch.bind_symbolic_shape %1059, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 4, 128)> : !torch.vtensor<[4,?,8,4,128],f16>
    %int0_958 = torch.constant.int 0
    %1060 = torch.aten.clone %1059, %int0_958 : !torch.vtensor<[4,?,8,4,128],f16>, !torch.int -> !torch.vtensor<[4,?,8,4,128],f16>
    torch.bind_symbolic_shape %1060, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 4, 128)> : !torch.vtensor<[4,?,8,4,128],f16>
    %int4_959 = torch.constant.int 4
    %int32_960 = torch.constant.int 32
    %int128_961 = torch.constant.int 128
    %1061 = torch.prim.ListConstruct %int4_959, %1057, %int32_960, %int128_961 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1062 = torch.aten._unsafe_view %1060, %1061 : !torch.vtensor<[4,?,8,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %1062, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int1_962 = torch.constant.int 1
    %int2_963 = torch.constant.int 2
    %1063 = torch.aten.transpose.int %979, %int1_962, %int2_963 : !torch.vtensor<[4,?,32,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,32,?,128],f16>
    torch.bind_symbolic_shape %1063, [%292], affine_map<()[s0] -> (4, 32, s0 * 32, 128)> : !torch.vtensor<[4,32,?,128],f16>
    %int1_964 = torch.constant.int 1
    %int2_965 = torch.constant.int 2
    %1064 = torch.aten.transpose.int %1055, %int1_964, %int2_965 : !torch.vtensor<[4,?,32,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,32,?,128],f16>
    torch.bind_symbolic_shape %1064, [%292], affine_map<()[s0] -> (4, 32, s0 * 32, 128)> : !torch.vtensor<[4,32,?,128],f16>
    %int1_966 = torch.constant.int 1
    %int2_967 = torch.constant.int 2
    %1065 = torch.aten.transpose.int %1062, %int1_966, %int2_967 : !torch.vtensor<[4,?,32,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,32,?,128],f16>
    torch.bind_symbolic_shape %1065, [%292], affine_map<()[s0] -> (4, 32, s0 * 32, 128)> : !torch.vtensor<[4,32,?,128],f16>
    %float0.000000e00_968 = torch.constant.float 0.000000e+00
    %false_969 = torch.constant.bool false
    %none_970 = torch.constant.none
    %1066:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%1063, %1064, %1065, %float0.000000e00_968, %false_969, %320, %none_970) : (!torch.vtensor<[4,32,?,128],f16>, !torch.vtensor<[4,32,?,128],f16>, !torch.vtensor<[4,32,?,128],f16>, !torch.float, !torch.bool, !torch.vtensor<[4,1,?,?],f16>, !torch.none) -> (!torch.vtensor<[4,32,?,128],f16>, !torch.vtensor<[4,32,?],f32>) 
    torch.bind_symbolic_shape %1066#0, [%292], affine_map<()[s0] -> (4, 32, s0 * 32, 128)> : !torch.vtensor<[4,32,?,128],f16>
    %int1_971 = torch.constant.int 1
    %int2_972 = torch.constant.int 2
    %1067 = torch.aten.transpose.int %1066#0, %int1_971, %int2_972 : !torch.vtensor<[4,32,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %1067, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int4_973 = torch.constant.int 4
    %int4096_974 = torch.constant.int 4096
    %1068 = torch.prim.ListConstruct %int4_973, %965, %int4096_974 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1069 = torch.aten.view %1067, %1068 : !torch.vtensor<[4,?,32,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %1069, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int-2_975 = torch.constant.int -2
    %int-1_976 = torch.constant.int -1
    %1070 = torch.aten.transpose.int %32, %int-2_975, %int-1_976 : !torch.vtensor<[4096,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,4096],f16>
    %int4_977 = torch.constant.int 4
    %1071 = torch.aten.mul.int %int4_977, %965 : !torch.int, !torch.int -> !torch.int
    %int4096_978 = torch.constant.int 4096
    %1072 = torch.prim.ListConstruct %1071, %int4096_978 : (!torch.int, !torch.int) -> !torch.list<int>
    %1073 = torch.aten.view %1069, %1072 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %1073, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %1074 = torch.aten.mm %1073, %1070 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %1074, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_979 = torch.constant.int 4
    %int4096_980 = torch.constant.int 4096
    %1075 = torch.prim.ListConstruct %int4_979, %965, %int4096_980 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1076 = torch.aten.view %1074, %1075 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %1076, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_981 = torch.constant.int 1
    %1077 = torch.aten.add.Tensor %914, %1076, %int1_981 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %1077, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int6_982 = torch.constant.int 6
    %1078 = torch.prims.convert_element_type %1077, %int6_982 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %1078, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_983 = torch.constant.int 2
    %1079 = torch.aten.pow.Tensor_Scalar %1078, %int2_983 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %1079, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int-1_984 = torch.constant.int -1
    %1080 = torch.prim.ListConstruct %int-1_984 : (!torch.int) -> !torch.list<int>
    %true_985 = torch.constant.bool true
    %none_986 = torch.constant.none
    %1081 = torch.aten.mean.dim %1079, %1080, %true_985, %none_986 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %1081, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_987 = torch.constant.float 9.9999997473787516E-6
    %int1_988 = torch.constant.int 1
    %1082 = torch.aten.add.Scalar %1081, %float9.999990e-06_987, %int1_988 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %1082, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %1083 = torch.aten.rsqrt %1082 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %1083, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %1084 = torch.aten.mul.Tensor %1078, %1083 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %1084, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %1085 = torch.aten.mul.Tensor %33, %1084 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %1085, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int5_989 = torch.constant.int 5
    %1086 = torch.prims.convert_element_type %1085, %int5_989 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %1086, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int-2_990 = torch.constant.int -2
    %int-1_991 = torch.constant.int -1
    %1087 = torch.aten.transpose.int %34, %int-2_990, %int-1_991 : !torch.vtensor<[14336,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,14336],f16>
    %int4_992 = torch.constant.int 4
    %1088 = torch.aten.mul.int %int4_992, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_993 = torch.constant.int 4096
    %1089 = torch.prim.ListConstruct %1088, %int4096_993 : (!torch.int, !torch.int) -> !torch.list<int>
    %1090 = torch.aten.view %1086, %1089 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %1090, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %1091 = torch.aten.mm %1090, %1087 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,14336],f16> -> !torch.vtensor<[?,14336],f16>
    torch.bind_symbolic_shape %1091, [%292], affine_map<()[s0] -> (s0 * 128, 14336)> : !torch.vtensor<[?,14336],f16>
    %int4_994 = torch.constant.int 4
    %int14336_995 = torch.constant.int 14336
    %1092 = torch.prim.ListConstruct %int4_994, %294, %int14336_995 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1093 = torch.aten.view %1091, %1092 : !torch.vtensor<[?,14336],f16>, !torch.list<int> -> !torch.vtensor<[4,?,14336],f16>
    torch.bind_symbolic_shape %1093, [%292], affine_map<()[s0] -> (4, s0 * 32, 14336)> : !torch.vtensor<[4,?,14336],f16>
    %1094 = torch.aten.silu %1093 : !torch.vtensor<[4,?,14336],f16> -> !torch.vtensor<[4,?,14336],f16>
    torch.bind_symbolic_shape %1094, [%292], affine_map<()[s0] -> (4, s0 * 32, 14336)> : !torch.vtensor<[4,?,14336],f16>
    %int-2_996 = torch.constant.int -2
    %int-1_997 = torch.constant.int -1
    %1095 = torch.aten.transpose.int %35, %int-2_996, %int-1_997 : !torch.vtensor<[14336,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,14336],f16>
    %int4_998 = torch.constant.int 4
    %1096 = torch.aten.mul.int %int4_998, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_999 = torch.constant.int 4096
    %1097 = torch.prim.ListConstruct %1096, %int4096_999 : (!torch.int, !torch.int) -> !torch.list<int>
    %1098 = torch.aten.view %1086, %1097 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %1098, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %1099 = torch.aten.mm %1098, %1095 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,14336],f16> -> !torch.vtensor<[?,14336],f16>
    torch.bind_symbolic_shape %1099, [%292], affine_map<()[s0] -> (s0 * 128, 14336)> : !torch.vtensor<[?,14336],f16>
    %int4_1000 = torch.constant.int 4
    %int14336_1001 = torch.constant.int 14336
    %1100 = torch.prim.ListConstruct %int4_1000, %294, %int14336_1001 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1101 = torch.aten.view %1099, %1100 : !torch.vtensor<[?,14336],f16>, !torch.list<int> -> !torch.vtensor<[4,?,14336],f16>
    torch.bind_symbolic_shape %1101, [%292], affine_map<()[s0] -> (4, s0 * 32, 14336)> : !torch.vtensor<[4,?,14336],f16>
    %1102 = torch.aten.mul.Tensor %1094, %1101 : !torch.vtensor<[4,?,14336],f16>, !torch.vtensor<[4,?,14336],f16> -> !torch.vtensor<[4,?,14336],f16>
    torch.bind_symbolic_shape %1102, [%292], affine_map<()[s0] -> (4, s0 * 32, 14336)> : !torch.vtensor<[4,?,14336],f16>
    %int-2_1002 = torch.constant.int -2
    %int-1_1003 = torch.constant.int -1
    %1103 = torch.aten.transpose.int %36, %int-2_1002, %int-1_1003 : !torch.vtensor<[4096,14336],f16>, !torch.int, !torch.int -> !torch.vtensor<[14336,4096],f16>
    %int1_1004 = torch.constant.int 1
    %1104 = torch.aten.size.int %1093, %int1_1004 : !torch.vtensor<[4,?,14336],f16>, !torch.int -> !torch.int
    %int4_1005 = torch.constant.int 4
    %1105 = torch.aten.mul.int %int4_1005, %1104 : !torch.int, !torch.int -> !torch.int
    %int14336_1006 = torch.constant.int 14336
    %1106 = torch.prim.ListConstruct %1105, %int14336_1006 : (!torch.int, !torch.int) -> !torch.list<int>
    %1107 = torch.aten.view %1102, %1106 : !torch.vtensor<[4,?,14336],f16>, !torch.list<int> -> !torch.vtensor<[?,14336],f16>
    torch.bind_symbolic_shape %1107, [%292], affine_map<()[s0] -> (s0 * 128, 14336)> : !torch.vtensor<[?,14336],f16>
    %1108 = torch.aten.mm %1107, %1103 : !torch.vtensor<[?,14336],f16>, !torch.vtensor<[14336,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %1108, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_1007 = torch.constant.int 4
    %int4096_1008 = torch.constant.int 4096
    %1109 = torch.prim.ListConstruct %int4_1007, %1104, %int4096_1008 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1110 = torch.aten.view %1108, %1109 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %1110, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1009 = torch.constant.int 1
    %1111 = torch.aten.add.Tensor %1077, %1110, %int1_1009 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %1111, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int6_1010 = torch.constant.int 6
    %1112 = torch.prims.convert_element_type %1111, %int6_1010 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %1112, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_1011 = torch.constant.int 2
    %1113 = torch.aten.pow.Tensor_Scalar %1112, %int2_1011 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %1113, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int-1_1012 = torch.constant.int -1
    %1114 = torch.prim.ListConstruct %int-1_1012 : (!torch.int) -> !torch.list<int>
    %true_1013 = torch.constant.bool true
    %none_1014 = torch.constant.none
    %1115 = torch.aten.mean.dim %1113, %1114, %true_1013, %none_1014 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %1115, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_1015 = torch.constant.float 9.9999997473787516E-6
    %int1_1016 = torch.constant.int 1
    %1116 = torch.aten.add.Scalar %1115, %float9.999990e-06_1015, %int1_1016 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %1116, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %1117 = torch.aten.rsqrt %1116 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %1117, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %1118 = torch.aten.mul.Tensor %1112, %1117 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %1118, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %1119 = torch.aten.mul.Tensor %37, %1118 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %1119, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int5_1017 = torch.constant.int 5
    %1120 = torch.prims.convert_element_type %1119, %int5_1017 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %1120, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int-2_1018 = torch.constant.int -2
    %int-1_1019 = torch.constant.int -1
    %1121 = torch.aten.transpose.int %38, %int-2_1018, %int-1_1019 : !torch.vtensor<[4096,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,4096],f16>
    %int4_1020 = torch.constant.int 4
    %1122 = torch.aten.mul.int %int4_1020, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_1021 = torch.constant.int 4096
    %1123 = torch.prim.ListConstruct %1122, %int4096_1021 : (!torch.int, !torch.int) -> !torch.list<int>
    %1124 = torch.aten.view %1120, %1123 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %1124, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %1125 = torch.aten.mm %1124, %1121 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %1125, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_1022 = torch.constant.int 4
    %int4096_1023 = torch.constant.int 4096
    %1126 = torch.prim.ListConstruct %int4_1022, %294, %int4096_1023 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1127 = torch.aten.view %1125, %1126 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %1127, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int-2_1024 = torch.constant.int -2
    %int-1_1025 = torch.constant.int -1
    %1128 = torch.aten.transpose.int %39, %int-2_1024, %int-1_1025 : !torch.vtensor<[1024,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,1024],f16>
    %int4_1026 = torch.constant.int 4
    %1129 = torch.aten.mul.int %int4_1026, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_1027 = torch.constant.int 4096
    %1130 = torch.prim.ListConstruct %1129, %int4096_1027 : (!torch.int, !torch.int) -> !torch.list<int>
    %1131 = torch.aten.view %1120, %1130 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %1131, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %1132 = torch.aten.mm %1131, %1128 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1024],f16> -> !torch.vtensor<[?,1024],f16>
    torch.bind_symbolic_shape %1132, [%292], affine_map<()[s0] -> (s0 * 128, 1024)> : !torch.vtensor<[?,1024],f16>
    %int4_1028 = torch.constant.int 4
    %int1024_1029 = torch.constant.int 1024
    %1133 = torch.prim.ListConstruct %int4_1028, %294, %int1024_1029 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1134 = torch.aten.view %1132, %1133 : !torch.vtensor<[?,1024],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1024],f16>
    torch.bind_symbolic_shape %1134, [%292], affine_map<()[s0] -> (4, s0 * 32, 1024)> : !torch.vtensor<[4,?,1024],f16>
    %int-2_1030 = torch.constant.int -2
    %int-1_1031 = torch.constant.int -1
    %1135 = torch.aten.transpose.int %40, %int-2_1030, %int-1_1031 : !torch.vtensor<[1024,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,1024],f16>
    %int4_1032 = torch.constant.int 4
    %1136 = torch.aten.mul.int %int4_1032, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_1033 = torch.constant.int 4096
    %1137 = torch.prim.ListConstruct %1136, %int4096_1033 : (!torch.int, !torch.int) -> !torch.list<int>
    %1138 = torch.aten.view %1120, %1137 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %1138, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %1139 = torch.aten.mm %1138, %1135 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1024],f16> -> !torch.vtensor<[?,1024],f16>
    torch.bind_symbolic_shape %1139, [%292], affine_map<()[s0] -> (s0 * 128, 1024)> : !torch.vtensor<[?,1024],f16>
    %int4_1034 = torch.constant.int 4
    %int1024_1035 = torch.constant.int 1024
    %1140 = torch.prim.ListConstruct %int4_1034, %294, %int1024_1035 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1141 = torch.aten.view %1139, %1140 : !torch.vtensor<[?,1024],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1024],f16>
    torch.bind_symbolic_shape %1141, [%292], affine_map<()[s0] -> (4, s0 * 32, 1024)> : !torch.vtensor<[4,?,1024],f16>
    %int4_1036 = torch.constant.int 4
    %int32_1037 = torch.constant.int 32
    %int128_1038 = torch.constant.int 128
    %1142 = torch.prim.ListConstruct %int4_1036, %294, %int32_1037, %int128_1038 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1143 = torch.aten.view %1127, %1142 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %1143, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int4_1039 = torch.constant.int 4
    %int8_1040 = torch.constant.int 8
    %int128_1041 = torch.constant.int 128
    %1144 = torch.prim.ListConstruct %int4_1039, %294, %int8_1040, %int128_1041 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1145 = torch.aten.view %1134, %1144 : !torch.vtensor<[4,?,1024],f16>, !torch.list<int> -> !torch.vtensor<[4,?,8,128],f16>
    torch.bind_symbolic_shape %1145, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 128)> : !torch.vtensor<[4,?,8,128],f16>
    %int4_1042 = torch.constant.int 4
    %int8_1043 = torch.constant.int 8
    %int128_1044 = torch.constant.int 128
    %1146 = torch.prim.ListConstruct %int4_1042, %294, %int8_1043, %int128_1044 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1147 = torch.aten.view %1141, %1146 : !torch.vtensor<[4,?,1024],f16>, !torch.list<int> -> !torch.vtensor<[4,?,8,128],f16>
    torch.bind_symbolic_shape %1147, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 128)> : !torch.vtensor<[4,?,8,128],f16>
    %int131072_1045 = torch.constant.int 131072
    %none_1046 = torch.constant.none
    %none_1047 = torch.constant.none
    %cpu_1048 = torch.constant.device "cpu"
    %false_1049 = torch.constant.bool false
    %1148 = torch.aten.arange %int131072_1045, %none_1046, %none_1047, %cpu_1048, %false_1049 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64>
    %int0_1050 = torch.constant.int 0
    %int128_1051 = torch.constant.int 128
    %int2_1052 = torch.constant.int 2
    %none_1053 = torch.constant.none
    %none_1054 = torch.constant.none
    %cpu_1055 = torch.constant.device "cpu"
    %false_1056 = torch.constant.bool false
    %1149 = torch.aten.arange.start_step %int0_1050, %int128_1051, %int2_1052, %none_1053, %none_1054, %cpu_1055, %false_1056 : !torch.int, !torch.int, !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64>
    %int0_1057 = torch.constant.int 0
    %int0_1058 = torch.constant.int 0
    %int64_1059 = torch.constant.int 64
    %int1_1060 = torch.constant.int 1
    %1150 = torch.aten.slice.Tensor %1149, %int0_1057, %int0_1058, %int64_1059, %int1_1060 : !torch.vtensor<[64],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[64],si64>
    %int6_1061 = torch.constant.int 6
    %1151 = torch.prims.convert_element_type %1150, %int6_1061 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32>
    %int128_1062 = torch.constant.int 128
    %1152 = torch.aten.div.Scalar %1151, %int128_1062 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32>
    %float5.000000e05_1063 = torch.constant.float 5.000000e+05
    %1153 = torch.aten.pow.Scalar %float5.000000e05_1063, %1152 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %1154 = torch.aten.reciprocal %1153 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %float1.000000e00_1064 = torch.constant.float 1.000000e+00
    %1155 = torch.aten.mul.Scalar %1154, %float1.000000e00_1064 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32>
    %int131072_1065 = torch.constant.int 131072
    %int1_1066 = torch.constant.int 1
    %1156 = torch.prim.ListConstruct %int131072_1065, %int1_1066 : (!torch.int, !torch.int) -> !torch.list<int>
    %1157 = torch.aten.view %1148, %1156 : !torch.vtensor<[131072],si64>, !torch.list<int> -> !torch.vtensor<[131072,1],si64>
    %1158 = torch.aten.mul.Tensor %1157, %1155 : !torch.vtensor<[131072,1],si64>, !torch.vtensor<[64],f32> -> !torch.vtensor<[131072,64],f32>
    %1159 = torch.aten.cos %1158 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %1160 = torch.aten.sin %1158 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %1161 = torch.aten.complex %1159, %1160 : !torch.vtensor<[131072,64],f32>, !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],complex<f32>>
    %int1_1067 = torch.constant.int 1
    %1162 = torch.aten.size.int %1127, %int1_1067 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.int
    %int0_1068 = torch.constant.int 0
    %1163 = torch.aten.add.int %int0_1068, %1162 : !torch.int, !torch.int -> !torch.int
    %int0_1069 = torch.constant.int 0
    %int0_1070 = torch.constant.int 0
    %int1_1071 = torch.constant.int 1
    %1164 = torch.aten.slice.Tensor %1161, %int0_1069, %int0_1070, %1163, %int1_1071 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %1164, [%292], affine_map<()[s0] -> (s0 * 32, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_1072 = torch.constant.int 1
    %int0_1073 = torch.constant.int 0
    %int9223372036854775807_1074 = torch.constant.int 9223372036854775807
    %int1_1075 = torch.constant.int 1
    %1165 = torch.aten.slice.Tensor %1164, %int1_1072, %int0_1073, %int9223372036854775807_1074, %int1_1075 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %1165, [%292], affine_map<()[s0] -> (s0 * 32, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_1076 = torch.constant.int 0
    %1166 = torch.aten.unsqueeze %1165, %int0_1076 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %1166, [%292], affine_map<()[s0] -> (1, s0 * 32, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_1077 = torch.constant.int 2
    %1167 = torch.aten.unsqueeze %1166, %int2_1077 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %1167, [%292], affine_map<()[s0] -> (1, s0 * 32, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_1078 = torch.constant.int 3
    %int0_1079 = torch.constant.int 0
    %int9223372036854775807_1080 = torch.constant.int 9223372036854775807
    %int1_1081 = torch.constant.int 1
    %1168 = torch.aten.slice.Tensor %1167, %int3_1078, %int0_1079, %int9223372036854775807_1080, %int1_1081 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %1168, [%292], affine_map<()[s0] -> (1, s0 * 32, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %1169 = torch_c.to_builtin_tensor %1143 : !torch.vtensor<[4,?,32,128],f16> -> tensor<4x?x32x128xf16>
    %c1_1082 = arith.constant 1 : index
    %dim_1083 = tensor.dim %1169, %c1_1082 : tensor<4x?x32x128xf16>
    %1170 = flow.tensor.bitcast %1169 : tensor<4x?x32x128xf16>{%dim_1083} -> tensor<4x?x32x64xcomplex<f16>>{%dim_1083}
    %1171 = torch_c.from_builtin_tensor %1170 : tensor<4x?x32x64xcomplex<f16>> -> !torch.vtensor<[4,?,32,64],complex<f16>>
    torch.bind_symbolic_shape %1171, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 64)> : !torch.vtensor<[4,?,32,64],complex<f16>>
    %1172 = torch.aten.mul.Tensor %1171, %1168 : !torch.vtensor<[4,?,32,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,32,64],complex<f32>>
    torch.bind_symbolic_shape %1172, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 64)> : !torch.vtensor<[4,?,32,64],complex<f32>>
    %1173 = torch_c.to_builtin_tensor %1172 : !torch.vtensor<[4,?,32,64],complex<f32>> -> tensor<4x?x32x64xcomplex<f32>>
    %c1_1084 = arith.constant 1 : index
    %dim_1085 = tensor.dim %1173, %c1_1084 : tensor<4x?x32x64xcomplex<f32>>
    %1174 = flow.tensor.bitcast %1173 : tensor<4x?x32x64xcomplex<f32>>{%dim_1085} -> tensor<4x?x32x128xf32>{%dim_1085}
    %1175 = torch_c.from_builtin_tensor %1174 : tensor<4x?x32x128xf32> -> !torch.vtensor<[4,?,32,128],f32>
    torch.bind_symbolic_shape %1175, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f32>
    %int5_1086 = torch.constant.int 5
    %1176 = torch.prims.convert_element_type %1175, %int5_1086 : !torch.vtensor<[4,?,32,128],f32>, !torch.int -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %1176, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int131072_1087 = torch.constant.int 131072
    %none_1088 = torch.constant.none
    %none_1089 = torch.constant.none
    %cpu_1090 = torch.constant.device "cpu"
    %false_1091 = torch.constant.bool false
    %1177 = torch.aten.arange %int131072_1087, %none_1088, %none_1089, %cpu_1090, %false_1091 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64>
    %int0_1092 = torch.constant.int 0
    %int128_1093 = torch.constant.int 128
    %int2_1094 = torch.constant.int 2
    %none_1095 = torch.constant.none
    %none_1096 = torch.constant.none
    %cpu_1097 = torch.constant.device "cpu"
    %false_1098 = torch.constant.bool false
    %1178 = torch.aten.arange.start_step %int0_1092, %int128_1093, %int2_1094, %none_1095, %none_1096, %cpu_1097, %false_1098 : !torch.int, !torch.int, !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64>
    %int0_1099 = torch.constant.int 0
    %int0_1100 = torch.constant.int 0
    %int64_1101 = torch.constant.int 64
    %int1_1102 = torch.constant.int 1
    %1179 = torch.aten.slice.Tensor %1178, %int0_1099, %int0_1100, %int64_1101, %int1_1102 : !torch.vtensor<[64],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[64],si64>
    %int6_1103 = torch.constant.int 6
    %1180 = torch.prims.convert_element_type %1179, %int6_1103 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32>
    %int128_1104 = torch.constant.int 128
    %1181 = torch.aten.div.Scalar %1180, %int128_1104 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32>
    %float5.000000e05_1105 = torch.constant.float 5.000000e+05
    %1182 = torch.aten.pow.Scalar %float5.000000e05_1105, %1181 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %1183 = torch.aten.reciprocal %1182 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %float1.000000e00_1106 = torch.constant.float 1.000000e+00
    %1184 = torch.aten.mul.Scalar %1183, %float1.000000e00_1106 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32>
    %int131072_1107 = torch.constant.int 131072
    %int1_1108 = torch.constant.int 1
    %1185 = torch.prim.ListConstruct %int131072_1107, %int1_1108 : (!torch.int, !torch.int) -> !torch.list<int>
    %1186 = torch.aten.view %1177, %1185 : !torch.vtensor<[131072],si64>, !torch.list<int> -> !torch.vtensor<[131072,1],si64>
    %1187 = torch.aten.mul.Tensor %1186, %1184 : !torch.vtensor<[131072,1],si64>, !torch.vtensor<[64],f32> -> !torch.vtensor<[131072,64],f32>
    %1188 = torch.aten.cos %1187 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %1189 = torch.aten.sin %1187 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %1190 = torch.aten.complex %1188, %1189 : !torch.vtensor<[131072,64],f32>, !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],complex<f32>>
    %int1_1109 = torch.constant.int 1
    %1191 = torch.aten.size.int %1134, %int1_1109 : !torch.vtensor<[4,?,1024],f16>, !torch.int -> !torch.int
    %int0_1110 = torch.constant.int 0
    %1192 = torch.aten.add.int %int0_1110, %1191 : !torch.int, !torch.int -> !torch.int
    %int0_1111 = torch.constant.int 0
    %int0_1112 = torch.constant.int 0
    %int1_1113 = torch.constant.int 1
    %1193 = torch.aten.slice.Tensor %1190, %int0_1111, %int0_1112, %1192, %int1_1113 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %1193, [%292], affine_map<()[s0] -> (s0 * 32, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_1114 = torch.constant.int 1
    %int0_1115 = torch.constant.int 0
    %int9223372036854775807_1116 = torch.constant.int 9223372036854775807
    %int1_1117 = torch.constant.int 1
    %1194 = torch.aten.slice.Tensor %1193, %int1_1114, %int0_1115, %int9223372036854775807_1116, %int1_1117 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %1194, [%292], affine_map<()[s0] -> (s0 * 32, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_1118 = torch.constant.int 0
    %1195 = torch.aten.unsqueeze %1194, %int0_1118 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %1195, [%292], affine_map<()[s0] -> (1, s0 * 32, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_1119 = torch.constant.int 2
    %1196 = torch.aten.unsqueeze %1195, %int2_1119 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %1196, [%292], affine_map<()[s0] -> (1, s0 * 32, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_1120 = torch.constant.int 3
    %int0_1121 = torch.constant.int 0
    %int9223372036854775807_1122 = torch.constant.int 9223372036854775807
    %int1_1123 = torch.constant.int 1
    %1197 = torch.aten.slice.Tensor %1196, %int3_1120, %int0_1121, %int9223372036854775807_1122, %int1_1123 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %1197, [%292], affine_map<()[s0] -> (1, s0 * 32, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %1198 = torch_c.to_builtin_tensor %1145 : !torch.vtensor<[4,?,8,128],f16> -> tensor<4x?x8x128xf16>
    %c1_1124 = arith.constant 1 : index
    %dim_1125 = tensor.dim %1198, %c1_1124 : tensor<4x?x8x128xf16>
    %1199 = flow.tensor.bitcast %1198 : tensor<4x?x8x128xf16>{%dim_1125} -> tensor<4x?x8x64xcomplex<f16>>{%dim_1125}
    %1200 = torch_c.from_builtin_tensor %1199 : tensor<4x?x8x64xcomplex<f16>> -> !torch.vtensor<[4,?,8,64],complex<f16>>
    torch.bind_symbolic_shape %1200, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 64)> : !torch.vtensor<[4,?,8,64],complex<f16>>
    %1201 = torch.aten.mul.Tensor %1200, %1197 : !torch.vtensor<[4,?,8,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,8,64],complex<f32>>
    torch.bind_symbolic_shape %1201, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 64)> : !torch.vtensor<[4,?,8,64],complex<f32>>
    %1202 = torch_c.to_builtin_tensor %1201 : !torch.vtensor<[4,?,8,64],complex<f32>> -> tensor<4x?x8x64xcomplex<f32>>
    %c1_1126 = arith.constant 1 : index
    %dim_1127 = tensor.dim %1202, %c1_1126 : tensor<4x?x8x64xcomplex<f32>>
    %1203 = flow.tensor.bitcast %1202 : tensor<4x?x8x64xcomplex<f32>>{%dim_1127} -> tensor<4x?x8x128xf32>{%dim_1127}
    %1204 = torch_c.from_builtin_tensor %1203 : tensor<4x?x8x128xf32> -> !torch.vtensor<[4,?,8,128],f32>
    torch.bind_symbolic_shape %1204, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 128)> : !torch.vtensor<[4,?,8,128],f32>
    %int5_1128 = torch.constant.int 5
    %1205 = torch.prims.convert_element_type %1204, %int5_1128 : !torch.vtensor<[4,?,8,128],f32>, !torch.int -> !torch.vtensor<[4,?,8,128],f16>
    torch.bind_symbolic_shape %1205, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 128)> : !torch.vtensor<[4,?,8,128],f16>
    %int64_1129 = torch.constant.int 64
    %1206 = torch.aten.mul.Scalar %arg2, %int64_1129 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %1206, [%292], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int8_1130 = torch.constant.int 8
    %int1_1131 = torch.constant.int 1
    %1207 = torch.aten.add.Scalar %1206, %int8_1130, %int1_1131 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %1207, [%292], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int4_1132 = torch.constant.int 4
    %int32_1133 = torch.constant.int 32
    %int8_1134 = torch.constant.int 8
    %int128_1135 = torch.constant.int 128
    %1208 = torch.prim.ListConstruct %int4_1132, %425, %int32_1133, %int8_1134, %int128_1135 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1209 = torch.aten.view %1205, %1208 : !torch.vtensor<[4,?,8,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,8,128],f16>
    torch.bind_symbolic_shape %1209, [%292], affine_map<()[s0] -> (4, s0, 32, 8, 128)> : !torch.vtensor<[4,?,32,8,128],f16>
    %int4_1136 = torch.constant.int 4
    %1210 = torch.aten.mul.int %int4_1136, %425 : !torch.int, !torch.int -> !torch.int
    %int32_1137 = torch.constant.int 32
    %int8_1138 = torch.constant.int 8
    %int128_1139 = torch.constant.int 128
    %1211 = torch.prim.ListConstruct %1210, %int32_1137, %int8_1138, %int128_1139 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1212 = torch.aten.view %1209, %1211 : !torch.vtensor<[4,?,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %1212, [%292], affine_map<()[s0] -> (s0 * 4, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int4_1140 = torch.constant.int 4
    %1213 = torch.aten.mul.int %int4_1140, %425 : !torch.int, !torch.int -> !torch.int
    %1214 = torch.prim.ListConstruct %1213 : (!torch.int) -> !torch.list<int>
    %1215 = torch.aten.view %1207, %1214 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %1215, [%292], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int32_1141 = torch.constant.int 32
    %int2_1142 = torch.constant.int 2
    %int32_1143 = torch.constant.int 32
    %int8_1144 = torch.constant.int 8
    %int128_1145 = torch.constant.int 128
    %1216 = torch.prim.ListConstruct %416, %int32_1141, %int2_1142, %int32_1143, %int8_1144, %int128_1145 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1217 = torch.aten.view %1049, %1216 : !torch.vtensor<[?,2097152],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f16>
    torch.bind_symbolic_shape %1217, [%293], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f16>
    %int32_1146 = torch.constant.int 32
    %1218 = torch.aten.mul.int %416, %int32_1146 : !torch.int, !torch.int -> !torch.int
    %int2_1147 = torch.constant.int 2
    %1219 = torch.aten.mul.int %1218, %int2_1147 : !torch.int, !torch.int -> !torch.int
    %int32_1148 = torch.constant.int 32
    %int8_1149 = torch.constant.int 8
    %int128_1150 = torch.constant.int 128
    %1220 = torch.prim.ListConstruct %1219, %int32_1148, %int8_1149, %int128_1150 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1221 = torch.aten.view %1217, %1220 : !torch.vtensor<[?,32,2,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %1221, [%293], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %1222 = torch.prim.ListConstruct %1215 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_1151 = torch.constant.bool false
    %1223 = torch.aten.index_put %1221, %1222, %1212, %false_1151 : !torch.vtensor<[?,32,8,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,32,8,128],f16>, !torch.bool -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %1223, [%293], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int32_1152 = torch.constant.int 32
    %int2_1153 = torch.constant.int 2
    %int32_1154 = torch.constant.int 32
    %int8_1155 = torch.constant.int 8
    %int128_1156 = torch.constant.int 128
    %1224 = torch.prim.ListConstruct %416, %int32_1152, %int2_1153, %int32_1154, %int8_1155, %int128_1156 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1225 = torch.aten.view %1223, %1224 : !torch.vtensor<[?,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f16>
    torch.bind_symbolic_shape %1225, [%293], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f16>
    %int2097152_1157 = torch.constant.int 2097152
    %1226 = torch.prim.ListConstruct %416, %int2097152_1157 : (!torch.int, !torch.int) -> !torch.list<int>
    %1227 = torch.aten.view %1225, %1226 : !torch.vtensor<[?,32,2,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,2097152],f16>
    torch.bind_symbolic_shape %1227, [%293], affine_map<()[s0] -> (s0, 2097152)> : !torch.vtensor<[?,2097152],f16>
    %int32_1158 = torch.constant.int 32
    %int2_1159 = torch.constant.int 2
    %int32_1160 = torch.constant.int 32
    %int8_1161 = torch.constant.int 8
    %int128_1162 = torch.constant.int 128
    %1228 = torch.prim.ListConstruct %416, %int32_1158, %int2_1159, %int32_1160, %int8_1161, %int128_1162 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1229 = torch.aten.view %1227, %1228 : !torch.vtensor<[?,2097152],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f16>
    torch.bind_symbolic_shape %1229, [%293], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f16>
    %int32_1163 = torch.constant.int 32
    %int8_1164 = torch.constant.int 8
    %int128_1165 = torch.constant.int 128
    %1230 = torch.prim.ListConstruct %1219, %int32_1163, %int8_1164, %int128_1165 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1231 = torch.aten.view %1229, %1230 : !torch.vtensor<[?,32,2,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %1231, [%293], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int4_1166 = torch.constant.int 4
    %int32_1167 = torch.constant.int 32
    %int8_1168 = torch.constant.int 8
    %int128_1169 = torch.constant.int 128
    %1232 = torch.prim.ListConstruct %int4_1166, %425, %int32_1167, %int8_1168, %int128_1169 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1233 = torch.aten.view %1147, %1232 : !torch.vtensor<[4,?,8,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,8,128],f16>
    torch.bind_symbolic_shape %1233, [%292], affine_map<()[s0] -> (4, s0, 32, 8, 128)> : !torch.vtensor<[4,?,32,8,128],f16>
    %int4_1170 = torch.constant.int 4
    %1234 = torch.aten.mul.int %int4_1170, %425 : !torch.int, !torch.int -> !torch.int
    %int32_1171 = torch.constant.int 32
    %int8_1172 = torch.constant.int 8
    %int128_1173 = torch.constant.int 128
    %1235 = torch.prim.ListConstruct %1234, %int32_1171, %int8_1172, %int128_1173 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1236 = torch.aten.view %1233, %1235 : !torch.vtensor<[4,?,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %1236, [%292], affine_map<()[s0] -> (s0 * 4, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int1_1174 = torch.constant.int 1
    %int1_1175 = torch.constant.int 1
    %1237 = torch.aten.add.Scalar %1207, %int1_1174, %int1_1175 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %1237, [%292], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int4_1176 = torch.constant.int 4
    %1238 = torch.aten.mul.int %int4_1176, %425 : !torch.int, !torch.int -> !torch.int
    %1239 = torch.prim.ListConstruct %1238 : (!torch.int) -> !torch.list<int>
    %1240 = torch.aten.view %1237, %1239 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %1240, [%292], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %1241 = torch.prim.ListConstruct %1240 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_1177 = torch.constant.bool false
    %1242 = torch.aten.index_put %1231, %1241, %1236, %false_1177 : !torch.vtensor<[?,32,8,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,32,8,128],f16>, !torch.bool -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %1242, [%293], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int32_1178 = torch.constant.int 32
    %int2_1179 = torch.constant.int 2
    %int32_1180 = torch.constant.int 32
    %int8_1181 = torch.constant.int 8
    %int128_1182 = torch.constant.int 128
    %1243 = torch.prim.ListConstruct %416, %int32_1178, %int2_1179, %int32_1180, %int8_1181, %int128_1182 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1244 = torch.aten.view %1242, %1243 : !torch.vtensor<[?,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f16>
    torch.bind_symbolic_shape %1244, [%293], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f16>
    %int2097152_1183 = torch.constant.int 2097152
    %1245 = torch.prim.ListConstruct %416, %int2097152_1183 : (!torch.int, !torch.int) -> !torch.list<int>
    %1246 = torch.aten.view %1244, %1245 : !torch.vtensor<[?,32,2,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,2097152],f16>
    torch.bind_symbolic_shape %1246, [%293], affine_map<()[s0] -> (s0, 2097152)> : !torch.vtensor<[?,2097152],f16>
    %int-2_1184 = torch.constant.int -2
    %1247 = torch.aten.unsqueeze %1205, %int-2_1184 : !torch.vtensor<[4,?,8,128],f16>, !torch.int -> !torch.vtensor<[4,?,8,1,128],f16>
    torch.bind_symbolic_shape %1247, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 1, 128)> : !torch.vtensor<[4,?,8,1,128],f16>
    %int4_1185 = torch.constant.int 4
    %int8_1186 = torch.constant.int 8
    %int4_1187 = torch.constant.int 4
    %int128_1188 = torch.constant.int 128
    %1248 = torch.prim.ListConstruct %int4_1185, %1191, %int8_1186, %int4_1187, %int128_1188 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_1189 = torch.constant.bool false
    %1249 = torch.aten.expand %1247, %1248, %false_1189 : !torch.vtensor<[4,?,8,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,8,4,128],f16>
    torch.bind_symbolic_shape %1249, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 4, 128)> : !torch.vtensor<[4,?,8,4,128],f16>
    %int0_1190 = torch.constant.int 0
    %1250 = torch.aten.clone %1249, %int0_1190 : !torch.vtensor<[4,?,8,4,128],f16>, !torch.int -> !torch.vtensor<[4,?,8,4,128],f16>
    torch.bind_symbolic_shape %1250, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 4, 128)> : !torch.vtensor<[4,?,8,4,128],f16>
    %int4_1191 = torch.constant.int 4
    %int32_1192 = torch.constant.int 32
    %int128_1193 = torch.constant.int 128
    %1251 = torch.prim.ListConstruct %int4_1191, %1191, %int32_1192, %int128_1193 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1252 = torch.aten._unsafe_view %1250, %1251 : !torch.vtensor<[4,?,8,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %1252, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int-2_1194 = torch.constant.int -2
    %1253 = torch.aten.unsqueeze %1147, %int-2_1194 : !torch.vtensor<[4,?,8,128],f16>, !torch.int -> !torch.vtensor<[4,?,8,1,128],f16>
    torch.bind_symbolic_shape %1253, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 1, 128)> : !torch.vtensor<[4,?,8,1,128],f16>
    %int1_1195 = torch.constant.int 1
    %1254 = torch.aten.size.int %1141, %int1_1195 : !torch.vtensor<[4,?,1024],f16>, !torch.int -> !torch.int
    %int4_1196 = torch.constant.int 4
    %int8_1197 = torch.constant.int 8
    %int4_1198 = torch.constant.int 4
    %int128_1199 = torch.constant.int 128
    %1255 = torch.prim.ListConstruct %int4_1196, %1254, %int8_1197, %int4_1198, %int128_1199 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_1200 = torch.constant.bool false
    %1256 = torch.aten.expand %1253, %1255, %false_1200 : !torch.vtensor<[4,?,8,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,8,4,128],f16>
    torch.bind_symbolic_shape %1256, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 4, 128)> : !torch.vtensor<[4,?,8,4,128],f16>
    %int0_1201 = torch.constant.int 0
    %1257 = torch.aten.clone %1256, %int0_1201 : !torch.vtensor<[4,?,8,4,128],f16>, !torch.int -> !torch.vtensor<[4,?,8,4,128],f16>
    torch.bind_symbolic_shape %1257, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 4, 128)> : !torch.vtensor<[4,?,8,4,128],f16>
    %int4_1202 = torch.constant.int 4
    %int32_1203 = torch.constant.int 32
    %int128_1204 = torch.constant.int 128
    %1258 = torch.prim.ListConstruct %int4_1202, %1254, %int32_1203, %int128_1204 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1259 = torch.aten._unsafe_view %1257, %1258 : !torch.vtensor<[4,?,8,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %1259, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int1_1205 = torch.constant.int 1
    %int2_1206 = torch.constant.int 2
    %1260 = torch.aten.transpose.int %1176, %int1_1205, %int2_1206 : !torch.vtensor<[4,?,32,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,32,?,128],f16>
    torch.bind_symbolic_shape %1260, [%292], affine_map<()[s0] -> (4, 32, s0 * 32, 128)> : !torch.vtensor<[4,32,?,128],f16>
    %int1_1207 = torch.constant.int 1
    %int2_1208 = torch.constant.int 2
    %1261 = torch.aten.transpose.int %1252, %int1_1207, %int2_1208 : !torch.vtensor<[4,?,32,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,32,?,128],f16>
    torch.bind_symbolic_shape %1261, [%292], affine_map<()[s0] -> (4, 32, s0 * 32, 128)> : !torch.vtensor<[4,32,?,128],f16>
    %int1_1209 = torch.constant.int 1
    %int2_1210 = torch.constant.int 2
    %1262 = torch.aten.transpose.int %1259, %int1_1209, %int2_1210 : !torch.vtensor<[4,?,32,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,32,?,128],f16>
    torch.bind_symbolic_shape %1262, [%292], affine_map<()[s0] -> (4, 32, s0 * 32, 128)> : !torch.vtensor<[4,32,?,128],f16>
    %float0.000000e00_1211 = torch.constant.float 0.000000e+00
    %false_1212 = torch.constant.bool false
    %none_1213 = torch.constant.none
    %1263:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%1260, %1261, %1262, %float0.000000e00_1211, %false_1212, %320, %none_1213) : (!torch.vtensor<[4,32,?,128],f16>, !torch.vtensor<[4,32,?,128],f16>, !torch.vtensor<[4,32,?,128],f16>, !torch.float, !torch.bool, !torch.vtensor<[4,1,?,?],f16>, !torch.none) -> (!torch.vtensor<[4,32,?,128],f16>, !torch.vtensor<[4,32,?],f32>) 
    torch.bind_symbolic_shape %1263#0, [%292], affine_map<()[s0] -> (4, 32, s0 * 32, 128)> : !torch.vtensor<[4,32,?,128],f16>
    %int1_1214 = torch.constant.int 1
    %int2_1215 = torch.constant.int 2
    %1264 = torch.aten.transpose.int %1263#0, %int1_1214, %int2_1215 : !torch.vtensor<[4,32,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %1264, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int4_1216 = torch.constant.int 4
    %int4096_1217 = torch.constant.int 4096
    %1265 = torch.prim.ListConstruct %int4_1216, %1162, %int4096_1217 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1266 = torch.aten.view %1264, %1265 : !torch.vtensor<[4,?,32,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %1266, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int-2_1218 = torch.constant.int -2
    %int-1_1219 = torch.constant.int -1
    %1267 = torch.aten.transpose.int %41, %int-2_1218, %int-1_1219 : !torch.vtensor<[4096,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,4096],f16>
    %int4_1220 = torch.constant.int 4
    %1268 = torch.aten.mul.int %int4_1220, %1162 : !torch.int, !torch.int -> !torch.int
    %int4096_1221 = torch.constant.int 4096
    %1269 = torch.prim.ListConstruct %1268, %int4096_1221 : (!torch.int, !torch.int) -> !torch.list<int>
    %1270 = torch.aten.view %1266, %1269 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %1270, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %1271 = torch.aten.mm %1270, %1267 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %1271, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_1222 = torch.constant.int 4
    %int4096_1223 = torch.constant.int 4096
    %1272 = torch.prim.ListConstruct %int4_1222, %1162, %int4096_1223 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1273 = torch.aten.view %1271, %1272 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %1273, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1224 = torch.constant.int 1
    %1274 = torch.aten.add.Tensor %1111, %1273, %int1_1224 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %1274, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int6_1225 = torch.constant.int 6
    %1275 = torch.prims.convert_element_type %1274, %int6_1225 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %1275, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_1226 = torch.constant.int 2
    %1276 = torch.aten.pow.Tensor_Scalar %1275, %int2_1226 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %1276, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int-1_1227 = torch.constant.int -1
    %1277 = torch.prim.ListConstruct %int-1_1227 : (!torch.int) -> !torch.list<int>
    %true_1228 = torch.constant.bool true
    %none_1229 = torch.constant.none
    %1278 = torch.aten.mean.dim %1276, %1277, %true_1228, %none_1229 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %1278, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_1230 = torch.constant.float 9.9999997473787516E-6
    %int1_1231 = torch.constant.int 1
    %1279 = torch.aten.add.Scalar %1278, %float9.999990e-06_1230, %int1_1231 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %1279, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %1280 = torch.aten.rsqrt %1279 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %1280, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %1281 = torch.aten.mul.Tensor %1275, %1280 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %1281, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %1282 = torch.aten.mul.Tensor %42, %1281 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %1282, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int5_1232 = torch.constant.int 5
    %1283 = torch.prims.convert_element_type %1282, %int5_1232 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %1283, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int-2_1233 = torch.constant.int -2
    %int-1_1234 = torch.constant.int -1
    %1284 = torch.aten.transpose.int %43, %int-2_1233, %int-1_1234 : !torch.vtensor<[14336,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,14336],f16>
    %int4_1235 = torch.constant.int 4
    %1285 = torch.aten.mul.int %int4_1235, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_1236 = torch.constant.int 4096
    %1286 = torch.prim.ListConstruct %1285, %int4096_1236 : (!torch.int, !torch.int) -> !torch.list<int>
    %1287 = torch.aten.view %1283, %1286 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %1287, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %1288 = torch.aten.mm %1287, %1284 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,14336],f16> -> !torch.vtensor<[?,14336],f16>
    torch.bind_symbolic_shape %1288, [%292], affine_map<()[s0] -> (s0 * 128, 14336)> : !torch.vtensor<[?,14336],f16>
    %int4_1237 = torch.constant.int 4
    %int14336_1238 = torch.constant.int 14336
    %1289 = torch.prim.ListConstruct %int4_1237, %294, %int14336_1238 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1290 = torch.aten.view %1288, %1289 : !torch.vtensor<[?,14336],f16>, !torch.list<int> -> !torch.vtensor<[4,?,14336],f16>
    torch.bind_symbolic_shape %1290, [%292], affine_map<()[s0] -> (4, s0 * 32, 14336)> : !torch.vtensor<[4,?,14336],f16>
    %1291 = torch.aten.silu %1290 : !torch.vtensor<[4,?,14336],f16> -> !torch.vtensor<[4,?,14336],f16>
    torch.bind_symbolic_shape %1291, [%292], affine_map<()[s0] -> (4, s0 * 32, 14336)> : !torch.vtensor<[4,?,14336],f16>
    %int-2_1239 = torch.constant.int -2
    %int-1_1240 = torch.constant.int -1
    %1292 = torch.aten.transpose.int %44, %int-2_1239, %int-1_1240 : !torch.vtensor<[14336,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,14336],f16>
    %int4_1241 = torch.constant.int 4
    %1293 = torch.aten.mul.int %int4_1241, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_1242 = torch.constant.int 4096
    %1294 = torch.prim.ListConstruct %1293, %int4096_1242 : (!torch.int, !torch.int) -> !torch.list<int>
    %1295 = torch.aten.view %1283, %1294 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %1295, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %1296 = torch.aten.mm %1295, %1292 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,14336],f16> -> !torch.vtensor<[?,14336],f16>
    torch.bind_symbolic_shape %1296, [%292], affine_map<()[s0] -> (s0 * 128, 14336)> : !torch.vtensor<[?,14336],f16>
    %int4_1243 = torch.constant.int 4
    %int14336_1244 = torch.constant.int 14336
    %1297 = torch.prim.ListConstruct %int4_1243, %294, %int14336_1244 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1298 = torch.aten.view %1296, %1297 : !torch.vtensor<[?,14336],f16>, !torch.list<int> -> !torch.vtensor<[4,?,14336],f16>
    torch.bind_symbolic_shape %1298, [%292], affine_map<()[s0] -> (4, s0 * 32, 14336)> : !torch.vtensor<[4,?,14336],f16>
    %1299 = torch.aten.mul.Tensor %1291, %1298 : !torch.vtensor<[4,?,14336],f16>, !torch.vtensor<[4,?,14336],f16> -> !torch.vtensor<[4,?,14336],f16>
    torch.bind_symbolic_shape %1299, [%292], affine_map<()[s0] -> (4, s0 * 32, 14336)> : !torch.vtensor<[4,?,14336],f16>
    %int-2_1245 = torch.constant.int -2
    %int-1_1246 = torch.constant.int -1
    %1300 = torch.aten.transpose.int %45, %int-2_1245, %int-1_1246 : !torch.vtensor<[4096,14336],f16>, !torch.int, !torch.int -> !torch.vtensor<[14336,4096],f16>
    %int1_1247 = torch.constant.int 1
    %1301 = torch.aten.size.int %1290, %int1_1247 : !torch.vtensor<[4,?,14336],f16>, !torch.int -> !torch.int
    %int4_1248 = torch.constant.int 4
    %1302 = torch.aten.mul.int %int4_1248, %1301 : !torch.int, !torch.int -> !torch.int
    %int14336_1249 = torch.constant.int 14336
    %1303 = torch.prim.ListConstruct %1302, %int14336_1249 : (!torch.int, !torch.int) -> !torch.list<int>
    %1304 = torch.aten.view %1299, %1303 : !torch.vtensor<[4,?,14336],f16>, !torch.list<int> -> !torch.vtensor<[?,14336],f16>
    torch.bind_symbolic_shape %1304, [%292], affine_map<()[s0] -> (s0 * 128, 14336)> : !torch.vtensor<[?,14336],f16>
    %1305 = torch.aten.mm %1304, %1300 : !torch.vtensor<[?,14336],f16>, !torch.vtensor<[14336,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %1305, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_1250 = torch.constant.int 4
    %int4096_1251 = torch.constant.int 4096
    %1306 = torch.prim.ListConstruct %int4_1250, %1301, %int4096_1251 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1307 = torch.aten.view %1305, %1306 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %1307, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1252 = torch.constant.int 1
    %1308 = torch.aten.add.Tensor %1274, %1307, %int1_1252 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %1308, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int6_1253 = torch.constant.int 6
    %1309 = torch.prims.convert_element_type %1308, %int6_1253 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %1309, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_1254 = torch.constant.int 2
    %1310 = torch.aten.pow.Tensor_Scalar %1309, %int2_1254 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %1310, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int-1_1255 = torch.constant.int -1
    %1311 = torch.prim.ListConstruct %int-1_1255 : (!torch.int) -> !torch.list<int>
    %true_1256 = torch.constant.bool true
    %none_1257 = torch.constant.none
    %1312 = torch.aten.mean.dim %1310, %1311, %true_1256, %none_1257 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %1312, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_1258 = torch.constant.float 9.9999997473787516E-6
    %int1_1259 = torch.constant.int 1
    %1313 = torch.aten.add.Scalar %1312, %float9.999990e-06_1258, %int1_1259 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %1313, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %1314 = torch.aten.rsqrt %1313 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %1314, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %1315 = torch.aten.mul.Tensor %1309, %1314 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %1315, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %1316 = torch.aten.mul.Tensor %46, %1315 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %1316, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int5_1260 = torch.constant.int 5
    %1317 = torch.prims.convert_element_type %1316, %int5_1260 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %1317, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int-2_1261 = torch.constant.int -2
    %int-1_1262 = torch.constant.int -1
    %1318 = torch.aten.transpose.int %47, %int-2_1261, %int-1_1262 : !torch.vtensor<[4096,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,4096],f16>
    %int4_1263 = torch.constant.int 4
    %1319 = torch.aten.mul.int %int4_1263, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_1264 = torch.constant.int 4096
    %1320 = torch.prim.ListConstruct %1319, %int4096_1264 : (!torch.int, !torch.int) -> !torch.list<int>
    %1321 = torch.aten.view %1317, %1320 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %1321, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %1322 = torch.aten.mm %1321, %1318 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %1322, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_1265 = torch.constant.int 4
    %int4096_1266 = torch.constant.int 4096
    %1323 = torch.prim.ListConstruct %int4_1265, %294, %int4096_1266 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1324 = torch.aten.view %1322, %1323 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %1324, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int-2_1267 = torch.constant.int -2
    %int-1_1268 = torch.constant.int -1
    %1325 = torch.aten.transpose.int %48, %int-2_1267, %int-1_1268 : !torch.vtensor<[1024,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,1024],f16>
    %int4_1269 = torch.constant.int 4
    %1326 = torch.aten.mul.int %int4_1269, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_1270 = torch.constant.int 4096
    %1327 = torch.prim.ListConstruct %1326, %int4096_1270 : (!torch.int, !torch.int) -> !torch.list<int>
    %1328 = torch.aten.view %1317, %1327 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %1328, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %1329 = torch.aten.mm %1328, %1325 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1024],f16> -> !torch.vtensor<[?,1024],f16>
    torch.bind_symbolic_shape %1329, [%292], affine_map<()[s0] -> (s0 * 128, 1024)> : !torch.vtensor<[?,1024],f16>
    %int4_1271 = torch.constant.int 4
    %int1024_1272 = torch.constant.int 1024
    %1330 = torch.prim.ListConstruct %int4_1271, %294, %int1024_1272 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1331 = torch.aten.view %1329, %1330 : !torch.vtensor<[?,1024],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1024],f16>
    torch.bind_symbolic_shape %1331, [%292], affine_map<()[s0] -> (4, s0 * 32, 1024)> : !torch.vtensor<[4,?,1024],f16>
    %int-2_1273 = torch.constant.int -2
    %int-1_1274 = torch.constant.int -1
    %1332 = torch.aten.transpose.int %49, %int-2_1273, %int-1_1274 : !torch.vtensor<[1024,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,1024],f16>
    %int4_1275 = torch.constant.int 4
    %1333 = torch.aten.mul.int %int4_1275, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_1276 = torch.constant.int 4096
    %1334 = torch.prim.ListConstruct %1333, %int4096_1276 : (!torch.int, !torch.int) -> !torch.list<int>
    %1335 = torch.aten.view %1317, %1334 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %1335, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %1336 = torch.aten.mm %1335, %1332 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1024],f16> -> !torch.vtensor<[?,1024],f16>
    torch.bind_symbolic_shape %1336, [%292], affine_map<()[s0] -> (s0 * 128, 1024)> : !torch.vtensor<[?,1024],f16>
    %int4_1277 = torch.constant.int 4
    %int1024_1278 = torch.constant.int 1024
    %1337 = torch.prim.ListConstruct %int4_1277, %294, %int1024_1278 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1338 = torch.aten.view %1336, %1337 : !torch.vtensor<[?,1024],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1024],f16>
    torch.bind_symbolic_shape %1338, [%292], affine_map<()[s0] -> (4, s0 * 32, 1024)> : !torch.vtensor<[4,?,1024],f16>
    %int4_1279 = torch.constant.int 4
    %int32_1280 = torch.constant.int 32
    %int128_1281 = torch.constant.int 128
    %1339 = torch.prim.ListConstruct %int4_1279, %294, %int32_1280, %int128_1281 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1340 = torch.aten.view %1324, %1339 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %1340, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int4_1282 = torch.constant.int 4
    %int8_1283 = torch.constant.int 8
    %int128_1284 = torch.constant.int 128
    %1341 = torch.prim.ListConstruct %int4_1282, %294, %int8_1283, %int128_1284 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1342 = torch.aten.view %1331, %1341 : !torch.vtensor<[4,?,1024],f16>, !torch.list<int> -> !torch.vtensor<[4,?,8,128],f16>
    torch.bind_symbolic_shape %1342, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 128)> : !torch.vtensor<[4,?,8,128],f16>
    %int4_1285 = torch.constant.int 4
    %int8_1286 = torch.constant.int 8
    %int128_1287 = torch.constant.int 128
    %1343 = torch.prim.ListConstruct %int4_1285, %294, %int8_1286, %int128_1287 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1344 = torch.aten.view %1338, %1343 : !torch.vtensor<[4,?,1024],f16>, !torch.list<int> -> !torch.vtensor<[4,?,8,128],f16>
    torch.bind_symbolic_shape %1344, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 128)> : !torch.vtensor<[4,?,8,128],f16>
    %int131072_1288 = torch.constant.int 131072
    %none_1289 = torch.constant.none
    %none_1290 = torch.constant.none
    %cpu_1291 = torch.constant.device "cpu"
    %false_1292 = torch.constant.bool false
    %1345 = torch.aten.arange %int131072_1288, %none_1289, %none_1290, %cpu_1291, %false_1292 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64>
    %int0_1293 = torch.constant.int 0
    %int128_1294 = torch.constant.int 128
    %int2_1295 = torch.constant.int 2
    %none_1296 = torch.constant.none
    %none_1297 = torch.constant.none
    %cpu_1298 = torch.constant.device "cpu"
    %false_1299 = torch.constant.bool false
    %1346 = torch.aten.arange.start_step %int0_1293, %int128_1294, %int2_1295, %none_1296, %none_1297, %cpu_1298, %false_1299 : !torch.int, !torch.int, !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64>
    %int0_1300 = torch.constant.int 0
    %int0_1301 = torch.constant.int 0
    %int64_1302 = torch.constant.int 64
    %int1_1303 = torch.constant.int 1
    %1347 = torch.aten.slice.Tensor %1346, %int0_1300, %int0_1301, %int64_1302, %int1_1303 : !torch.vtensor<[64],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[64],si64>
    %int6_1304 = torch.constant.int 6
    %1348 = torch.prims.convert_element_type %1347, %int6_1304 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32>
    %int128_1305 = torch.constant.int 128
    %1349 = torch.aten.div.Scalar %1348, %int128_1305 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32>
    %float5.000000e05_1306 = torch.constant.float 5.000000e+05
    %1350 = torch.aten.pow.Scalar %float5.000000e05_1306, %1349 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %1351 = torch.aten.reciprocal %1350 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %float1.000000e00_1307 = torch.constant.float 1.000000e+00
    %1352 = torch.aten.mul.Scalar %1351, %float1.000000e00_1307 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32>
    %int131072_1308 = torch.constant.int 131072
    %int1_1309 = torch.constant.int 1
    %1353 = torch.prim.ListConstruct %int131072_1308, %int1_1309 : (!torch.int, !torch.int) -> !torch.list<int>
    %1354 = torch.aten.view %1345, %1353 : !torch.vtensor<[131072],si64>, !torch.list<int> -> !torch.vtensor<[131072,1],si64>
    %1355 = torch.aten.mul.Tensor %1354, %1352 : !torch.vtensor<[131072,1],si64>, !torch.vtensor<[64],f32> -> !torch.vtensor<[131072,64],f32>
    %1356 = torch.aten.cos %1355 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %1357 = torch.aten.sin %1355 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %1358 = torch.aten.complex %1356, %1357 : !torch.vtensor<[131072,64],f32>, !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],complex<f32>>
    %int1_1310 = torch.constant.int 1
    %1359 = torch.aten.size.int %1324, %int1_1310 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.int
    %int0_1311 = torch.constant.int 0
    %1360 = torch.aten.add.int %int0_1311, %1359 : !torch.int, !torch.int -> !torch.int
    %int0_1312 = torch.constant.int 0
    %int0_1313 = torch.constant.int 0
    %int1_1314 = torch.constant.int 1
    %1361 = torch.aten.slice.Tensor %1358, %int0_1312, %int0_1313, %1360, %int1_1314 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %1361, [%292], affine_map<()[s0] -> (s0 * 32, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_1315 = torch.constant.int 1
    %int0_1316 = torch.constant.int 0
    %int9223372036854775807_1317 = torch.constant.int 9223372036854775807
    %int1_1318 = torch.constant.int 1
    %1362 = torch.aten.slice.Tensor %1361, %int1_1315, %int0_1316, %int9223372036854775807_1317, %int1_1318 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %1362, [%292], affine_map<()[s0] -> (s0 * 32, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_1319 = torch.constant.int 0
    %1363 = torch.aten.unsqueeze %1362, %int0_1319 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %1363, [%292], affine_map<()[s0] -> (1, s0 * 32, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_1320 = torch.constant.int 2
    %1364 = torch.aten.unsqueeze %1363, %int2_1320 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %1364, [%292], affine_map<()[s0] -> (1, s0 * 32, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_1321 = torch.constant.int 3
    %int0_1322 = torch.constant.int 0
    %int9223372036854775807_1323 = torch.constant.int 9223372036854775807
    %int1_1324 = torch.constant.int 1
    %1365 = torch.aten.slice.Tensor %1364, %int3_1321, %int0_1322, %int9223372036854775807_1323, %int1_1324 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %1365, [%292], affine_map<()[s0] -> (1, s0 * 32, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %1366 = torch_c.to_builtin_tensor %1340 : !torch.vtensor<[4,?,32,128],f16> -> tensor<4x?x32x128xf16>
    %c1_1325 = arith.constant 1 : index
    %dim_1326 = tensor.dim %1366, %c1_1325 : tensor<4x?x32x128xf16>
    %1367 = flow.tensor.bitcast %1366 : tensor<4x?x32x128xf16>{%dim_1326} -> tensor<4x?x32x64xcomplex<f16>>{%dim_1326}
    %1368 = torch_c.from_builtin_tensor %1367 : tensor<4x?x32x64xcomplex<f16>> -> !torch.vtensor<[4,?,32,64],complex<f16>>
    torch.bind_symbolic_shape %1368, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 64)> : !torch.vtensor<[4,?,32,64],complex<f16>>
    %1369 = torch.aten.mul.Tensor %1368, %1365 : !torch.vtensor<[4,?,32,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,32,64],complex<f32>>
    torch.bind_symbolic_shape %1369, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 64)> : !torch.vtensor<[4,?,32,64],complex<f32>>
    %1370 = torch_c.to_builtin_tensor %1369 : !torch.vtensor<[4,?,32,64],complex<f32>> -> tensor<4x?x32x64xcomplex<f32>>
    %c1_1327 = arith.constant 1 : index
    %dim_1328 = tensor.dim %1370, %c1_1327 : tensor<4x?x32x64xcomplex<f32>>
    %1371 = flow.tensor.bitcast %1370 : tensor<4x?x32x64xcomplex<f32>>{%dim_1328} -> tensor<4x?x32x128xf32>{%dim_1328}
    %1372 = torch_c.from_builtin_tensor %1371 : tensor<4x?x32x128xf32> -> !torch.vtensor<[4,?,32,128],f32>
    torch.bind_symbolic_shape %1372, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f32>
    %int5_1329 = torch.constant.int 5
    %1373 = torch.prims.convert_element_type %1372, %int5_1329 : !torch.vtensor<[4,?,32,128],f32>, !torch.int -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %1373, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int131072_1330 = torch.constant.int 131072
    %none_1331 = torch.constant.none
    %none_1332 = torch.constant.none
    %cpu_1333 = torch.constant.device "cpu"
    %false_1334 = torch.constant.bool false
    %1374 = torch.aten.arange %int131072_1330, %none_1331, %none_1332, %cpu_1333, %false_1334 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64>
    %int0_1335 = torch.constant.int 0
    %int128_1336 = torch.constant.int 128
    %int2_1337 = torch.constant.int 2
    %none_1338 = torch.constant.none
    %none_1339 = torch.constant.none
    %cpu_1340 = torch.constant.device "cpu"
    %false_1341 = torch.constant.bool false
    %1375 = torch.aten.arange.start_step %int0_1335, %int128_1336, %int2_1337, %none_1338, %none_1339, %cpu_1340, %false_1341 : !torch.int, !torch.int, !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64>
    %int0_1342 = torch.constant.int 0
    %int0_1343 = torch.constant.int 0
    %int64_1344 = torch.constant.int 64
    %int1_1345 = torch.constant.int 1
    %1376 = torch.aten.slice.Tensor %1375, %int0_1342, %int0_1343, %int64_1344, %int1_1345 : !torch.vtensor<[64],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[64],si64>
    %int6_1346 = torch.constant.int 6
    %1377 = torch.prims.convert_element_type %1376, %int6_1346 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32>
    %int128_1347 = torch.constant.int 128
    %1378 = torch.aten.div.Scalar %1377, %int128_1347 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32>
    %float5.000000e05_1348 = torch.constant.float 5.000000e+05
    %1379 = torch.aten.pow.Scalar %float5.000000e05_1348, %1378 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %1380 = torch.aten.reciprocal %1379 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %float1.000000e00_1349 = torch.constant.float 1.000000e+00
    %1381 = torch.aten.mul.Scalar %1380, %float1.000000e00_1349 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32>
    %int131072_1350 = torch.constant.int 131072
    %int1_1351 = torch.constant.int 1
    %1382 = torch.prim.ListConstruct %int131072_1350, %int1_1351 : (!torch.int, !torch.int) -> !torch.list<int>
    %1383 = torch.aten.view %1374, %1382 : !torch.vtensor<[131072],si64>, !torch.list<int> -> !torch.vtensor<[131072,1],si64>
    %1384 = torch.aten.mul.Tensor %1383, %1381 : !torch.vtensor<[131072,1],si64>, !torch.vtensor<[64],f32> -> !torch.vtensor<[131072,64],f32>
    %1385 = torch.aten.cos %1384 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %1386 = torch.aten.sin %1384 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %1387 = torch.aten.complex %1385, %1386 : !torch.vtensor<[131072,64],f32>, !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],complex<f32>>
    %int1_1352 = torch.constant.int 1
    %1388 = torch.aten.size.int %1331, %int1_1352 : !torch.vtensor<[4,?,1024],f16>, !torch.int -> !torch.int
    %int0_1353 = torch.constant.int 0
    %1389 = torch.aten.add.int %int0_1353, %1388 : !torch.int, !torch.int -> !torch.int
    %int0_1354 = torch.constant.int 0
    %int0_1355 = torch.constant.int 0
    %int1_1356 = torch.constant.int 1
    %1390 = torch.aten.slice.Tensor %1387, %int0_1354, %int0_1355, %1389, %int1_1356 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %1390, [%292], affine_map<()[s0] -> (s0 * 32, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_1357 = torch.constant.int 1
    %int0_1358 = torch.constant.int 0
    %int9223372036854775807_1359 = torch.constant.int 9223372036854775807
    %int1_1360 = torch.constant.int 1
    %1391 = torch.aten.slice.Tensor %1390, %int1_1357, %int0_1358, %int9223372036854775807_1359, %int1_1360 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %1391, [%292], affine_map<()[s0] -> (s0 * 32, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_1361 = torch.constant.int 0
    %1392 = torch.aten.unsqueeze %1391, %int0_1361 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %1392, [%292], affine_map<()[s0] -> (1, s0 * 32, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_1362 = torch.constant.int 2
    %1393 = torch.aten.unsqueeze %1392, %int2_1362 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %1393, [%292], affine_map<()[s0] -> (1, s0 * 32, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_1363 = torch.constant.int 3
    %int0_1364 = torch.constant.int 0
    %int9223372036854775807_1365 = torch.constant.int 9223372036854775807
    %int1_1366 = torch.constant.int 1
    %1394 = torch.aten.slice.Tensor %1393, %int3_1363, %int0_1364, %int9223372036854775807_1365, %int1_1366 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %1394, [%292], affine_map<()[s0] -> (1, s0 * 32, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %1395 = torch_c.to_builtin_tensor %1342 : !torch.vtensor<[4,?,8,128],f16> -> tensor<4x?x8x128xf16>
    %c1_1367 = arith.constant 1 : index
    %dim_1368 = tensor.dim %1395, %c1_1367 : tensor<4x?x8x128xf16>
    %1396 = flow.tensor.bitcast %1395 : tensor<4x?x8x128xf16>{%dim_1368} -> tensor<4x?x8x64xcomplex<f16>>{%dim_1368}
    %1397 = torch_c.from_builtin_tensor %1396 : tensor<4x?x8x64xcomplex<f16>> -> !torch.vtensor<[4,?,8,64],complex<f16>>
    torch.bind_symbolic_shape %1397, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 64)> : !torch.vtensor<[4,?,8,64],complex<f16>>
    %1398 = torch.aten.mul.Tensor %1397, %1394 : !torch.vtensor<[4,?,8,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,8,64],complex<f32>>
    torch.bind_symbolic_shape %1398, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 64)> : !torch.vtensor<[4,?,8,64],complex<f32>>
    %1399 = torch_c.to_builtin_tensor %1398 : !torch.vtensor<[4,?,8,64],complex<f32>> -> tensor<4x?x8x64xcomplex<f32>>
    %c1_1369 = arith.constant 1 : index
    %dim_1370 = tensor.dim %1399, %c1_1369 : tensor<4x?x8x64xcomplex<f32>>
    %1400 = flow.tensor.bitcast %1399 : tensor<4x?x8x64xcomplex<f32>>{%dim_1370} -> tensor<4x?x8x128xf32>{%dim_1370}
    %1401 = torch_c.from_builtin_tensor %1400 : tensor<4x?x8x128xf32> -> !torch.vtensor<[4,?,8,128],f32>
    torch.bind_symbolic_shape %1401, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 128)> : !torch.vtensor<[4,?,8,128],f32>
    %int5_1371 = torch.constant.int 5
    %1402 = torch.prims.convert_element_type %1401, %int5_1371 : !torch.vtensor<[4,?,8,128],f32>, !torch.int -> !torch.vtensor<[4,?,8,128],f16>
    torch.bind_symbolic_shape %1402, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 128)> : !torch.vtensor<[4,?,8,128],f16>
    %int64_1372 = torch.constant.int 64
    %1403 = torch.aten.mul.Scalar %arg2, %int64_1372 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %1403, [%292], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int10 = torch.constant.int 10
    %int1_1373 = torch.constant.int 1
    %1404 = torch.aten.add.Scalar %1403, %int10, %int1_1373 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %1404, [%292], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int4_1374 = torch.constant.int 4
    %int32_1375 = torch.constant.int 32
    %int8_1376 = torch.constant.int 8
    %int128_1377 = torch.constant.int 128
    %1405 = torch.prim.ListConstruct %int4_1374, %425, %int32_1375, %int8_1376, %int128_1377 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1406 = torch.aten.view %1402, %1405 : !torch.vtensor<[4,?,8,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,8,128],f16>
    torch.bind_symbolic_shape %1406, [%292], affine_map<()[s0] -> (4, s0, 32, 8, 128)> : !torch.vtensor<[4,?,32,8,128],f16>
    %int4_1378 = torch.constant.int 4
    %1407 = torch.aten.mul.int %int4_1378, %425 : !torch.int, !torch.int -> !torch.int
    %int32_1379 = torch.constant.int 32
    %int8_1380 = torch.constant.int 8
    %int128_1381 = torch.constant.int 128
    %1408 = torch.prim.ListConstruct %1407, %int32_1379, %int8_1380, %int128_1381 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1409 = torch.aten.view %1406, %1408 : !torch.vtensor<[4,?,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %1409, [%292], affine_map<()[s0] -> (s0 * 4, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int4_1382 = torch.constant.int 4
    %1410 = torch.aten.mul.int %int4_1382, %425 : !torch.int, !torch.int -> !torch.int
    %1411 = torch.prim.ListConstruct %1410 : (!torch.int) -> !torch.list<int>
    %1412 = torch.aten.view %1404, %1411 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %1412, [%292], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int32_1383 = torch.constant.int 32
    %int2_1384 = torch.constant.int 2
    %int32_1385 = torch.constant.int 32
    %int8_1386 = torch.constant.int 8
    %int128_1387 = torch.constant.int 128
    %1413 = torch.prim.ListConstruct %416, %int32_1383, %int2_1384, %int32_1385, %int8_1386, %int128_1387 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1414 = torch.aten.view %1246, %1413 : !torch.vtensor<[?,2097152],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f16>
    torch.bind_symbolic_shape %1414, [%293], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f16>
    %int32_1388 = torch.constant.int 32
    %1415 = torch.aten.mul.int %416, %int32_1388 : !torch.int, !torch.int -> !torch.int
    %int2_1389 = torch.constant.int 2
    %1416 = torch.aten.mul.int %1415, %int2_1389 : !torch.int, !torch.int -> !torch.int
    %int32_1390 = torch.constant.int 32
    %int8_1391 = torch.constant.int 8
    %int128_1392 = torch.constant.int 128
    %1417 = torch.prim.ListConstruct %1416, %int32_1390, %int8_1391, %int128_1392 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1418 = torch.aten.view %1414, %1417 : !torch.vtensor<[?,32,2,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %1418, [%293], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %1419 = torch.prim.ListConstruct %1412 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_1393 = torch.constant.bool false
    %1420 = torch.aten.index_put %1418, %1419, %1409, %false_1393 : !torch.vtensor<[?,32,8,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,32,8,128],f16>, !torch.bool -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %1420, [%293], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int32_1394 = torch.constant.int 32
    %int2_1395 = torch.constant.int 2
    %int32_1396 = torch.constant.int 32
    %int8_1397 = torch.constant.int 8
    %int128_1398 = torch.constant.int 128
    %1421 = torch.prim.ListConstruct %416, %int32_1394, %int2_1395, %int32_1396, %int8_1397, %int128_1398 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1422 = torch.aten.view %1420, %1421 : !torch.vtensor<[?,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f16>
    torch.bind_symbolic_shape %1422, [%293], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f16>
    %int2097152_1399 = torch.constant.int 2097152
    %1423 = torch.prim.ListConstruct %416, %int2097152_1399 : (!torch.int, !torch.int) -> !torch.list<int>
    %1424 = torch.aten.view %1422, %1423 : !torch.vtensor<[?,32,2,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,2097152],f16>
    torch.bind_symbolic_shape %1424, [%293], affine_map<()[s0] -> (s0, 2097152)> : !torch.vtensor<[?,2097152],f16>
    %int32_1400 = torch.constant.int 32
    %int2_1401 = torch.constant.int 2
    %int32_1402 = torch.constant.int 32
    %int8_1403 = torch.constant.int 8
    %int128_1404 = torch.constant.int 128
    %1425 = torch.prim.ListConstruct %416, %int32_1400, %int2_1401, %int32_1402, %int8_1403, %int128_1404 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1426 = torch.aten.view %1424, %1425 : !torch.vtensor<[?,2097152],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f16>
    torch.bind_symbolic_shape %1426, [%293], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f16>
    %int32_1405 = torch.constant.int 32
    %int8_1406 = torch.constant.int 8
    %int128_1407 = torch.constant.int 128
    %1427 = torch.prim.ListConstruct %1416, %int32_1405, %int8_1406, %int128_1407 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1428 = torch.aten.view %1426, %1427 : !torch.vtensor<[?,32,2,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %1428, [%293], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int4_1408 = torch.constant.int 4
    %int32_1409 = torch.constant.int 32
    %int8_1410 = torch.constant.int 8
    %int128_1411 = torch.constant.int 128
    %1429 = torch.prim.ListConstruct %int4_1408, %425, %int32_1409, %int8_1410, %int128_1411 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1430 = torch.aten.view %1344, %1429 : !torch.vtensor<[4,?,8,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,8,128],f16>
    torch.bind_symbolic_shape %1430, [%292], affine_map<()[s0] -> (4, s0, 32, 8, 128)> : !torch.vtensor<[4,?,32,8,128],f16>
    %int4_1412 = torch.constant.int 4
    %1431 = torch.aten.mul.int %int4_1412, %425 : !torch.int, !torch.int -> !torch.int
    %int32_1413 = torch.constant.int 32
    %int8_1414 = torch.constant.int 8
    %int128_1415 = torch.constant.int 128
    %1432 = torch.prim.ListConstruct %1431, %int32_1413, %int8_1414, %int128_1415 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1433 = torch.aten.view %1430, %1432 : !torch.vtensor<[4,?,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %1433, [%292], affine_map<()[s0] -> (s0 * 4, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int1_1416 = torch.constant.int 1
    %int1_1417 = torch.constant.int 1
    %1434 = torch.aten.add.Scalar %1404, %int1_1416, %int1_1417 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %1434, [%292], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int4_1418 = torch.constant.int 4
    %1435 = torch.aten.mul.int %int4_1418, %425 : !torch.int, !torch.int -> !torch.int
    %1436 = torch.prim.ListConstruct %1435 : (!torch.int) -> !torch.list<int>
    %1437 = torch.aten.view %1434, %1436 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %1437, [%292], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %1438 = torch.prim.ListConstruct %1437 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_1419 = torch.constant.bool false
    %1439 = torch.aten.index_put %1428, %1438, %1433, %false_1419 : !torch.vtensor<[?,32,8,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,32,8,128],f16>, !torch.bool -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %1439, [%293], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int32_1420 = torch.constant.int 32
    %int2_1421 = torch.constant.int 2
    %int32_1422 = torch.constant.int 32
    %int8_1423 = torch.constant.int 8
    %int128_1424 = torch.constant.int 128
    %1440 = torch.prim.ListConstruct %416, %int32_1420, %int2_1421, %int32_1422, %int8_1423, %int128_1424 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1441 = torch.aten.view %1439, %1440 : !torch.vtensor<[?,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f16>
    torch.bind_symbolic_shape %1441, [%293], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f16>
    %int2097152_1425 = torch.constant.int 2097152
    %1442 = torch.prim.ListConstruct %416, %int2097152_1425 : (!torch.int, !torch.int) -> !torch.list<int>
    %1443 = torch.aten.view %1441, %1442 : !torch.vtensor<[?,32,2,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,2097152],f16>
    torch.bind_symbolic_shape %1443, [%293], affine_map<()[s0] -> (s0, 2097152)> : !torch.vtensor<[?,2097152],f16>
    %int-2_1426 = torch.constant.int -2
    %1444 = torch.aten.unsqueeze %1402, %int-2_1426 : !torch.vtensor<[4,?,8,128],f16>, !torch.int -> !torch.vtensor<[4,?,8,1,128],f16>
    torch.bind_symbolic_shape %1444, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 1, 128)> : !torch.vtensor<[4,?,8,1,128],f16>
    %int4_1427 = torch.constant.int 4
    %int8_1428 = torch.constant.int 8
    %int4_1429 = torch.constant.int 4
    %int128_1430 = torch.constant.int 128
    %1445 = torch.prim.ListConstruct %int4_1427, %1388, %int8_1428, %int4_1429, %int128_1430 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_1431 = torch.constant.bool false
    %1446 = torch.aten.expand %1444, %1445, %false_1431 : !torch.vtensor<[4,?,8,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,8,4,128],f16>
    torch.bind_symbolic_shape %1446, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 4, 128)> : !torch.vtensor<[4,?,8,4,128],f16>
    %int0_1432 = torch.constant.int 0
    %1447 = torch.aten.clone %1446, %int0_1432 : !torch.vtensor<[4,?,8,4,128],f16>, !torch.int -> !torch.vtensor<[4,?,8,4,128],f16>
    torch.bind_symbolic_shape %1447, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 4, 128)> : !torch.vtensor<[4,?,8,4,128],f16>
    %int4_1433 = torch.constant.int 4
    %int32_1434 = torch.constant.int 32
    %int128_1435 = torch.constant.int 128
    %1448 = torch.prim.ListConstruct %int4_1433, %1388, %int32_1434, %int128_1435 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1449 = torch.aten._unsafe_view %1447, %1448 : !torch.vtensor<[4,?,8,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %1449, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int-2_1436 = torch.constant.int -2
    %1450 = torch.aten.unsqueeze %1344, %int-2_1436 : !torch.vtensor<[4,?,8,128],f16>, !torch.int -> !torch.vtensor<[4,?,8,1,128],f16>
    torch.bind_symbolic_shape %1450, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 1, 128)> : !torch.vtensor<[4,?,8,1,128],f16>
    %int1_1437 = torch.constant.int 1
    %1451 = torch.aten.size.int %1338, %int1_1437 : !torch.vtensor<[4,?,1024],f16>, !torch.int -> !torch.int
    %int4_1438 = torch.constant.int 4
    %int8_1439 = torch.constant.int 8
    %int4_1440 = torch.constant.int 4
    %int128_1441 = torch.constant.int 128
    %1452 = torch.prim.ListConstruct %int4_1438, %1451, %int8_1439, %int4_1440, %int128_1441 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_1442 = torch.constant.bool false
    %1453 = torch.aten.expand %1450, %1452, %false_1442 : !torch.vtensor<[4,?,8,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,8,4,128],f16>
    torch.bind_symbolic_shape %1453, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 4, 128)> : !torch.vtensor<[4,?,8,4,128],f16>
    %int0_1443 = torch.constant.int 0
    %1454 = torch.aten.clone %1453, %int0_1443 : !torch.vtensor<[4,?,8,4,128],f16>, !torch.int -> !torch.vtensor<[4,?,8,4,128],f16>
    torch.bind_symbolic_shape %1454, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 4, 128)> : !torch.vtensor<[4,?,8,4,128],f16>
    %int4_1444 = torch.constant.int 4
    %int32_1445 = torch.constant.int 32
    %int128_1446 = torch.constant.int 128
    %1455 = torch.prim.ListConstruct %int4_1444, %1451, %int32_1445, %int128_1446 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1456 = torch.aten._unsafe_view %1454, %1455 : !torch.vtensor<[4,?,8,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %1456, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int1_1447 = torch.constant.int 1
    %int2_1448 = torch.constant.int 2
    %1457 = torch.aten.transpose.int %1373, %int1_1447, %int2_1448 : !torch.vtensor<[4,?,32,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,32,?,128],f16>
    torch.bind_symbolic_shape %1457, [%292], affine_map<()[s0] -> (4, 32, s0 * 32, 128)> : !torch.vtensor<[4,32,?,128],f16>
    %int1_1449 = torch.constant.int 1
    %int2_1450 = torch.constant.int 2
    %1458 = torch.aten.transpose.int %1449, %int1_1449, %int2_1450 : !torch.vtensor<[4,?,32,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,32,?,128],f16>
    torch.bind_symbolic_shape %1458, [%292], affine_map<()[s0] -> (4, 32, s0 * 32, 128)> : !torch.vtensor<[4,32,?,128],f16>
    %int1_1451 = torch.constant.int 1
    %int2_1452 = torch.constant.int 2
    %1459 = torch.aten.transpose.int %1456, %int1_1451, %int2_1452 : !torch.vtensor<[4,?,32,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,32,?,128],f16>
    torch.bind_symbolic_shape %1459, [%292], affine_map<()[s0] -> (4, 32, s0 * 32, 128)> : !torch.vtensor<[4,32,?,128],f16>
    %float0.000000e00_1453 = torch.constant.float 0.000000e+00
    %false_1454 = torch.constant.bool false
    %none_1455 = torch.constant.none
    %1460:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%1457, %1458, %1459, %float0.000000e00_1453, %false_1454, %320, %none_1455) : (!torch.vtensor<[4,32,?,128],f16>, !torch.vtensor<[4,32,?,128],f16>, !torch.vtensor<[4,32,?,128],f16>, !torch.float, !torch.bool, !torch.vtensor<[4,1,?,?],f16>, !torch.none) -> (!torch.vtensor<[4,32,?,128],f16>, !torch.vtensor<[4,32,?],f32>) 
    torch.bind_symbolic_shape %1460#0, [%292], affine_map<()[s0] -> (4, 32, s0 * 32, 128)> : !torch.vtensor<[4,32,?,128],f16>
    %int1_1456 = torch.constant.int 1
    %int2_1457 = torch.constant.int 2
    %1461 = torch.aten.transpose.int %1460#0, %int1_1456, %int2_1457 : !torch.vtensor<[4,32,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %1461, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int4_1458 = torch.constant.int 4
    %int4096_1459 = torch.constant.int 4096
    %1462 = torch.prim.ListConstruct %int4_1458, %1359, %int4096_1459 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1463 = torch.aten.view %1461, %1462 : !torch.vtensor<[4,?,32,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %1463, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int-2_1460 = torch.constant.int -2
    %int-1_1461 = torch.constant.int -1
    %1464 = torch.aten.transpose.int %50, %int-2_1460, %int-1_1461 : !torch.vtensor<[4096,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,4096],f16>
    %int4_1462 = torch.constant.int 4
    %1465 = torch.aten.mul.int %int4_1462, %1359 : !torch.int, !torch.int -> !torch.int
    %int4096_1463 = torch.constant.int 4096
    %1466 = torch.prim.ListConstruct %1465, %int4096_1463 : (!torch.int, !torch.int) -> !torch.list<int>
    %1467 = torch.aten.view %1463, %1466 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %1467, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %1468 = torch.aten.mm %1467, %1464 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %1468, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_1464 = torch.constant.int 4
    %int4096_1465 = torch.constant.int 4096
    %1469 = torch.prim.ListConstruct %int4_1464, %1359, %int4096_1465 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1470 = torch.aten.view %1468, %1469 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %1470, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1466 = torch.constant.int 1
    %1471 = torch.aten.add.Tensor %1308, %1470, %int1_1466 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %1471, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int6_1467 = torch.constant.int 6
    %1472 = torch.prims.convert_element_type %1471, %int6_1467 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %1472, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_1468 = torch.constant.int 2
    %1473 = torch.aten.pow.Tensor_Scalar %1472, %int2_1468 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %1473, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int-1_1469 = torch.constant.int -1
    %1474 = torch.prim.ListConstruct %int-1_1469 : (!torch.int) -> !torch.list<int>
    %true_1470 = torch.constant.bool true
    %none_1471 = torch.constant.none
    %1475 = torch.aten.mean.dim %1473, %1474, %true_1470, %none_1471 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %1475, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_1472 = torch.constant.float 9.9999997473787516E-6
    %int1_1473 = torch.constant.int 1
    %1476 = torch.aten.add.Scalar %1475, %float9.999990e-06_1472, %int1_1473 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %1476, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %1477 = torch.aten.rsqrt %1476 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %1477, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %1478 = torch.aten.mul.Tensor %1472, %1477 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %1478, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %1479 = torch.aten.mul.Tensor %51, %1478 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %1479, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int5_1474 = torch.constant.int 5
    %1480 = torch.prims.convert_element_type %1479, %int5_1474 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %1480, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int-2_1475 = torch.constant.int -2
    %int-1_1476 = torch.constant.int -1
    %1481 = torch.aten.transpose.int %52, %int-2_1475, %int-1_1476 : !torch.vtensor<[14336,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,14336],f16>
    %int4_1477 = torch.constant.int 4
    %1482 = torch.aten.mul.int %int4_1477, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_1478 = torch.constant.int 4096
    %1483 = torch.prim.ListConstruct %1482, %int4096_1478 : (!torch.int, !torch.int) -> !torch.list<int>
    %1484 = torch.aten.view %1480, %1483 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %1484, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %1485 = torch.aten.mm %1484, %1481 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,14336],f16> -> !torch.vtensor<[?,14336],f16>
    torch.bind_symbolic_shape %1485, [%292], affine_map<()[s0] -> (s0 * 128, 14336)> : !torch.vtensor<[?,14336],f16>
    %int4_1479 = torch.constant.int 4
    %int14336_1480 = torch.constant.int 14336
    %1486 = torch.prim.ListConstruct %int4_1479, %294, %int14336_1480 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1487 = torch.aten.view %1485, %1486 : !torch.vtensor<[?,14336],f16>, !torch.list<int> -> !torch.vtensor<[4,?,14336],f16>
    torch.bind_symbolic_shape %1487, [%292], affine_map<()[s0] -> (4, s0 * 32, 14336)> : !torch.vtensor<[4,?,14336],f16>
    %1488 = torch.aten.silu %1487 : !torch.vtensor<[4,?,14336],f16> -> !torch.vtensor<[4,?,14336],f16>
    torch.bind_symbolic_shape %1488, [%292], affine_map<()[s0] -> (4, s0 * 32, 14336)> : !torch.vtensor<[4,?,14336],f16>
    %int-2_1481 = torch.constant.int -2
    %int-1_1482 = torch.constant.int -1
    %1489 = torch.aten.transpose.int %53, %int-2_1481, %int-1_1482 : !torch.vtensor<[14336,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,14336],f16>
    %int4_1483 = torch.constant.int 4
    %1490 = torch.aten.mul.int %int4_1483, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_1484 = torch.constant.int 4096
    %1491 = torch.prim.ListConstruct %1490, %int4096_1484 : (!torch.int, !torch.int) -> !torch.list<int>
    %1492 = torch.aten.view %1480, %1491 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %1492, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %1493 = torch.aten.mm %1492, %1489 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,14336],f16> -> !torch.vtensor<[?,14336],f16>
    torch.bind_symbolic_shape %1493, [%292], affine_map<()[s0] -> (s0 * 128, 14336)> : !torch.vtensor<[?,14336],f16>
    %int4_1485 = torch.constant.int 4
    %int14336_1486 = torch.constant.int 14336
    %1494 = torch.prim.ListConstruct %int4_1485, %294, %int14336_1486 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1495 = torch.aten.view %1493, %1494 : !torch.vtensor<[?,14336],f16>, !torch.list<int> -> !torch.vtensor<[4,?,14336],f16>
    torch.bind_symbolic_shape %1495, [%292], affine_map<()[s0] -> (4, s0 * 32, 14336)> : !torch.vtensor<[4,?,14336],f16>
    %1496 = torch.aten.mul.Tensor %1488, %1495 : !torch.vtensor<[4,?,14336],f16>, !torch.vtensor<[4,?,14336],f16> -> !torch.vtensor<[4,?,14336],f16>
    torch.bind_symbolic_shape %1496, [%292], affine_map<()[s0] -> (4, s0 * 32, 14336)> : !torch.vtensor<[4,?,14336],f16>
    %int-2_1487 = torch.constant.int -2
    %int-1_1488 = torch.constant.int -1
    %1497 = torch.aten.transpose.int %54, %int-2_1487, %int-1_1488 : !torch.vtensor<[4096,14336],f16>, !torch.int, !torch.int -> !torch.vtensor<[14336,4096],f16>
    %int1_1489 = torch.constant.int 1
    %1498 = torch.aten.size.int %1487, %int1_1489 : !torch.vtensor<[4,?,14336],f16>, !torch.int -> !torch.int
    %int4_1490 = torch.constant.int 4
    %1499 = torch.aten.mul.int %int4_1490, %1498 : !torch.int, !torch.int -> !torch.int
    %int14336_1491 = torch.constant.int 14336
    %1500 = torch.prim.ListConstruct %1499, %int14336_1491 : (!torch.int, !torch.int) -> !torch.list<int>
    %1501 = torch.aten.view %1496, %1500 : !torch.vtensor<[4,?,14336],f16>, !torch.list<int> -> !torch.vtensor<[?,14336],f16>
    torch.bind_symbolic_shape %1501, [%292], affine_map<()[s0] -> (s0 * 128, 14336)> : !torch.vtensor<[?,14336],f16>
    %1502 = torch.aten.mm %1501, %1497 : !torch.vtensor<[?,14336],f16>, !torch.vtensor<[14336,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %1502, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_1492 = torch.constant.int 4
    %int4096_1493 = torch.constant.int 4096
    %1503 = torch.prim.ListConstruct %int4_1492, %1498, %int4096_1493 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1504 = torch.aten.view %1502, %1503 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %1504, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1494 = torch.constant.int 1
    %1505 = torch.aten.add.Tensor %1471, %1504, %int1_1494 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %1505, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int6_1495 = torch.constant.int 6
    %1506 = torch.prims.convert_element_type %1505, %int6_1495 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %1506, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_1496 = torch.constant.int 2
    %1507 = torch.aten.pow.Tensor_Scalar %1506, %int2_1496 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %1507, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int-1_1497 = torch.constant.int -1
    %1508 = torch.prim.ListConstruct %int-1_1497 : (!torch.int) -> !torch.list<int>
    %true_1498 = torch.constant.bool true
    %none_1499 = torch.constant.none
    %1509 = torch.aten.mean.dim %1507, %1508, %true_1498, %none_1499 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %1509, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_1500 = torch.constant.float 9.9999997473787516E-6
    %int1_1501 = torch.constant.int 1
    %1510 = torch.aten.add.Scalar %1509, %float9.999990e-06_1500, %int1_1501 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %1510, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %1511 = torch.aten.rsqrt %1510 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %1511, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %1512 = torch.aten.mul.Tensor %1506, %1511 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %1512, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %1513 = torch.aten.mul.Tensor %55, %1512 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %1513, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int5_1502 = torch.constant.int 5
    %1514 = torch.prims.convert_element_type %1513, %int5_1502 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %1514, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int-2_1503 = torch.constant.int -2
    %int-1_1504 = torch.constant.int -1
    %1515 = torch.aten.transpose.int %56, %int-2_1503, %int-1_1504 : !torch.vtensor<[4096,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,4096],f16>
    %int4_1505 = torch.constant.int 4
    %1516 = torch.aten.mul.int %int4_1505, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_1506 = torch.constant.int 4096
    %1517 = torch.prim.ListConstruct %1516, %int4096_1506 : (!torch.int, !torch.int) -> !torch.list<int>
    %1518 = torch.aten.view %1514, %1517 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %1518, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %1519 = torch.aten.mm %1518, %1515 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %1519, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_1507 = torch.constant.int 4
    %int4096_1508 = torch.constant.int 4096
    %1520 = torch.prim.ListConstruct %int4_1507, %294, %int4096_1508 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1521 = torch.aten.view %1519, %1520 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %1521, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int-2_1509 = torch.constant.int -2
    %int-1_1510 = torch.constant.int -1
    %1522 = torch.aten.transpose.int %57, %int-2_1509, %int-1_1510 : !torch.vtensor<[1024,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,1024],f16>
    %int4_1511 = torch.constant.int 4
    %1523 = torch.aten.mul.int %int4_1511, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_1512 = torch.constant.int 4096
    %1524 = torch.prim.ListConstruct %1523, %int4096_1512 : (!torch.int, !torch.int) -> !torch.list<int>
    %1525 = torch.aten.view %1514, %1524 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %1525, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %1526 = torch.aten.mm %1525, %1522 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1024],f16> -> !torch.vtensor<[?,1024],f16>
    torch.bind_symbolic_shape %1526, [%292], affine_map<()[s0] -> (s0 * 128, 1024)> : !torch.vtensor<[?,1024],f16>
    %int4_1513 = torch.constant.int 4
    %int1024_1514 = torch.constant.int 1024
    %1527 = torch.prim.ListConstruct %int4_1513, %294, %int1024_1514 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1528 = torch.aten.view %1526, %1527 : !torch.vtensor<[?,1024],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1024],f16>
    torch.bind_symbolic_shape %1528, [%292], affine_map<()[s0] -> (4, s0 * 32, 1024)> : !torch.vtensor<[4,?,1024],f16>
    %int-2_1515 = torch.constant.int -2
    %int-1_1516 = torch.constant.int -1
    %1529 = torch.aten.transpose.int %58, %int-2_1515, %int-1_1516 : !torch.vtensor<[1024,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,1024],f16>
    %int4_1517 = torch.constant.int 4
    %1530 = torch.aten.mul.int %int4_1517, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_1518 = torch.constant.int 4096
    %1531 = torch.prim.ListConstruct %1530, %int4096_1518 : (!torch.int, !torch.int) -> !torch.list<int>
    %1532 = torch.aten.view %1514, %1531 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %1532, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %1533 = torch.aten.mm %1532, %1529 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1024],f16> -> !torch.vtensor<[?,1024],f16>
    torch.bind_symbolic_shape %1533, [%292], affine_map<()[s0] -> (s0 * 128, 1024)> : !torch.vtensor<[?,1024],f16>
    %int4_1519 = torch.constant.int 4
    %int1024_1520 = torch.constant.int 1024
    %1534 = torch.prim.ListConstruct %int4_1519, %294, %int1024_1520 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1535 = torch.aten.view %1533, %1534 : !torch.vtensor<[?,1024],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1024],f16>
    torch.bind_symbolic_shape %1535, [%292], affine_map<()[s0] -> (4, s0 * 32, 1024)> : !torch.vtensor<[4,?,1024],f16>
    %int4_1521 = torch.constant.int 4
    %int32_1522 = torch.constant.int 32
    %int128_1523 = torch.constant.int 128
    %1536 = torch.prim.ListConstruct %int4_1521, %294, %int32_1522, %int128_1523 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1537 = torch.aten.view %1521, %1536 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %1537, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int4_1524 = torch.constant.int 4
    %int8_1525 = torch.constant.int 8
    %int128_1526 = torch.constant.int 128
    %1538 = torch.prim.ListConstruct %int4_1524, %294, %int8_1525, %int128_1526 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1539 = torch.aten.view %1528, %1538 : !torch.vtensor<[4,?,1024],f16>, !torch.list<int> -> !torch.vtensor<[4,?,8,128],f16>
    torch.bind_symbolic_shape %1539, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 128)> : !torch.vtensor<[4,?,8,128],f16>
    %int4_1527 = torch.constant.int 4
    %int8_1528 = torch.constant.int 8
    %int128_1529 = torch.constant.int 128
    %1540 = torch.prim.ListConstruct %int4_1527, %294, %int8_1528, %int128_1529 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1541 = torch.aten.view %1535, %1540 : !torch.vtensor<[4,?,1024],f16>, !torch.list<int> -> !torch.vtensor<[4,?,8,128],f16>
    torch.bind_symbolic_shape %1541, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 128)> : !torch.vtensor<[4,?,8,128],f16>
    %int131072_1530 = torch.constant.int 131072
    %none_1531 = torch.constant.none
    %none_1532 = torch.constant.none
    %cpu_1533 = torch.constant.device "cpu"
    %false_1534 = torch.constant.bool false
    %1542 = torch.aten.arange %int131072_1530, %none_1531, %none_1532, %cpu_1533, %false_1534 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64>
    %int0_1535 = torch.constant.int 0
    %int128_1536 = torch.constant.int 128
    %int2_1537 = torch.constant.int 2
    %none_1538 = torch.constant.none
    %none_1539 = torch.constant.none
    %cpu_1540 = torch.constant.device "cpu"
    %false_1541 = torch.constant.bool false
    %1543 = torch.aten.arange.start_step %int0_1535, %int128_1536, %int2_1537, %none_1538, %none_1539, %cpu_1540, %false_1541 : !torch.int, !torch.int, !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64>
    %int0_1542 = torch.constant.int 0
    %int0_1543 = torch.constant.int 0
    %int64_1544 = torch.constant.int 64
    %int1_1545 = torch.constant.int 1
    %1544 = torch.aten.slice.Tensor %1543, %int0_1542, %int0_1543, %int64_1544, %int1_1545 : !torch.vtensor<[64],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[64],si64>
    %int6_1546 = torch.constant.int 6
    %1545 = torch.prims.convert_element_type %1544, %int6_1546 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32>
    %int128_1547 = torch.constant.int 128
    %1546 = torch.aten.div.Scalar %1545, %int128_1547 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32>
    %float5.000000e05_1548 = torch.constant.float 5.000000e+05
    %1547 = torch.aten.pow.Scalar %float5.000000e05_1548, %1546 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %1548 = torch.aten.reciprocal %1547 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %float1.000000e00_1549 = torch.constant.float 1.000000e+00
    %1549 = torch.aten.mul.Scalar %1548, %float1.000000e00_1549 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32>
    %int131072_1550 = torch.constant.int 131072
    %int1_1551 = torch.constant.int 1
    %1550 = torch.prim.ListConstruct %int131072_1550, %int1_1551 : (!torch.int, !torch.int) -> !torch.list<int>
    %1551 = torch.aten.view %1542, %1550 : !torch.vtensor<[131072],si64>, !torch.list<int> -> !torch.vtensor<[131072,1],si64>
    %1552 = torch.aten.mul.Tensor %1551, %1549 : !torch.vtensor<[131072,1],si64>, !torch.vtensor<[64],f32> -> !torch.vtensor<[131072,64],f32>
    %1553 = torch.aten.cos %1552 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %1554 = torch.aten.sin %1552 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %1555 = torch.aten.complex %1553, %1554 : !torch.vtensor<[131072,64],f32>, !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],complex<f32>>
    %int1_1552 = torch.constant.int 1
    %1556 = torch.aten.size.int %1521, %int1_1552 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.int
    %int0_1553 = torch.constant.int 0
    %1557 = torch.aten.add.int %int0_1553, %1556 : !torch.int, !torch.int -> !torch.int
    %int0_1554 = torch.constant.int 0
    %int0_1555 = torch.constant.int 0
    %int1_1556 = torch.constant.int 1
    %1558 = torch.aten.slice.Tensor %1555, %int0_1554, %int0_1555, %1557, %int1_1556 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %1558, [%292], affine_map<()[s0] -> (s0 * 32, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_1557 = torch.constant.int 1
    %int0_1558 = torch.constant.int 0
    %int9223372036854775807_1559 = torch.constant.int 9223372036854775807
    %int1_1560 = torch.constant.int 1
    %1559 = torch.aten.slice.Tensor %1558, %int1_1557, %int0_1558, %int9223372036854775807_1559, %int1_1560 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %1559, [%292], affine_map<()[s0] -> (s0 * 32, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_1561 = torch.constant.int 0
    %1560 = torch.aten.unsqueeze %1559, %int0_1561 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %1560, [%292], affine_map<()[s0] -> (1, s0 * 32, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_1562 = torch.constant.int 2
    %1561 = torch.aten.unsqueeze %1560, %int2_1562 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %1561, [%292], affine_map<()[s0] -> (1, s0 * 32, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_1563 = torch.constant.int 3
    %int0_1564 = torch.constant.int 0
    %int9223372036854775807_1565 = torch.constant.int 9223372036854775807
    %int1_1566 = torch.constant.int 1
    %1562 = torch.aten.slice.Tensor %1561, %int3_1563, %int0_1564, %int9223372036854775807_1565, %int1_1566 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %1562, [%292], affine_map<()[s0] -> (1, s0 * 32, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %1563 = torch_c.to_builtin_tensor %1537 : !torch.vtensor<[4,?,32,128],f16> -> tensor<4x?x32x128xf16>
    %c1_1567 = arith.constant 1 : index
    %dim_1568 = tensor.dim %1563, %c1_1567 : tensor<4x?x32x128xf16>
    %1564 = flow.tensor.bitcast %1563 : tensor<4x?x32x128xf16>{%dim_1568} -> tensor<4x?x32x64xcomplex<f16>>{%dim_1568}
    %1565 = torch_c.from_builtin_tensor %1564 : tensor<4x?x32x64xcomplex<f16>> -> !torch.vtensor<[4,?,32,64],complex<f16>>
    torch.bind_symbolic_shape %1565, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 64)> : !torch.vtensor<[4,?,32,64],complex<f16>>
    %1566 = torch.aten.mul.Tensor %1565, %1562 : !torch.vtensor<[4,?,32,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,32,64],complex<f32>>
    torch.bind_symbolic_shape %1566, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 64)> : !torch.vtensor<[4,?,32,64],complex<f32>>
    %1567 = torch_c.to_builtin_tensor %1566 : !torch.vtensor<[4,?,32,64],complex<f32>> -> tensor<4x?x32x64xcomplex<f32>>
    %c1_1569 = arith.constant 1 : index
    %dim_1570 = tensor.dim %1567, %c1_1569 : tensor<4x?x32x64xcomplex<f32>>
    %1568 = flow.tensor.bitcast %1567 : tensor<4x?x32x64xcomplex<f32>>{%dim_1570} -> tensor<4x?x32x128xf32>{%dim_1570}
    %1569 = torch_c.from_builtin_tensor %1568 : tensor<4x?x32x128xf32> -> !torch.vtensor<[4,?,32,128],f32>
    torch.bind_symbolic_shape %1569, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f32>
    %int5_1571 = torch.constant.int 5
    %1570 = torch.prims.convert_element_type %1569, %int5_1571 : !torch.vtensor<[4,?,32,128],f32>, !torch.int -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %1570, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int131072_1572 = torch.constant.int 131072
    %none_1573 = torch.constant.none
    %none_1574 = torch.constant.none
    %cpu_1575 = torch.constant.device "cpu"
    %false_1576 = torch.constant.bool false
    %1571 = torch.aten.arange %int131072_1572, %none_1573, %none_1574, %cpu_1575, %false_1576 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64>
    %int0_1577 = torch.constant.int 0
    %int128_1578 = torch.constant.int 128
    %int2_1579 = torch.constant.int 2
    %none_1580 = torch.constant.none
    %none_1581 = torch.constant.none
    %cpu_1582 = torch.constant.device "cpu"
    %false_1583 = torch.constant.bool false
    %1572 = torch.aten.arange.start_step %int0_1577, %int128_1578, %int2_1579, %none_1580, %none_1581, %cpu_1582, %false_1583 : !torch.int, !torch.int, !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64>
    %int0_1584 = torch.constant.int 0
    %int0_1585 = torch.constant.int 0
    %int64_1586 = torch.constant.int 64
    %int1_1587 = torch.constant.int 1
    %1573 = torch.aten.slice.Tensor %1572, %int0_1584, %int0_1585, %int64_1586, %int1_1587 : !torch.vtensor<[64],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[64],si64>
    %int6_1588 = torch.constant.int 6
    %1574 = torch.prims.convert_element_type %1573, %int6_1588 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32>
    %int128_1589 = torch.constant.int 128
    %1575 = torch.aten.div.Scalar %1574, %int128_1589 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32>
    %float5.000000e05_1590 = torch.constant.float 5.000000e+05
    %1576 = torch.aten.pow.Scalar %float5.000000e05_1590, %1575 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %1577 = torch.aten.reciprocal %1576 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %float1.000000e00_1591 = torch.constant.float 1.000000e+00
    %1578 = torch.aten.mul.Scalar %1577, %float1.000000e00_1591 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32>
    %int131072_1592 = torch.constant.int 131072
    %int1_1593 = torch.constant.int 1
    %1579 = torch.prim.ListConstruct %int131072_1592, %int1_1593 : (!torch.int, !torch.int) -> !torch.list<int>
    %1580 = torch.aten.view %1571, %1579 : !torch.vtensor<[131072],si64>, !torch.list<int> -> !torch.vtensor<[131072,1],si64>
    %1581 = torch.aten.mul.Tensor %1580, %1578 : !torch.vtensor<[131072,1],si64>, !torch.vtensor<[64],f32> -> !torch.vtensor<[131072,64],f32>
    %1582 = torch.aten.cos %1581 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %1583 = torch.aten.sin %1581 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %1584 = torch.aten.complex %1582, %1583 : !torch.vtensor<[131072,64],f32>, !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],complex<f32>>
    %int1_1594 = torch.constant.int 1
    %1585 = torch.aten.size.int %1528, %int1_1594 : !torch.vtensor<[4,?,1024],f16>, !torch.int -> !torch.int
    %int0_1595 = torch.constant.int 0
    %1586 = torch.aten.add.int %int0_1595, %1585 : !torch.int, !torch.int -> !torch.int
    %int0_1596 = torch.constant.int 0
    %int0_1597 = torch.constant.int 0
    %int1_1598 = torch.constant.int 1
    %1587 = torch.aten.slice.Tensor %1584, %int0_1596, %int0_1597, %1586, %int1_1598 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %1587, [%292], affine_map<()[s0] -> (s0 * 32, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_1599 = torch.constant.int 1
    %int0_1600 = torch.constant.int 0
    %int9223372036854775807_1601 = torch.constant.int 9223372036854775807
    %int1_1602 = torch.constant.int 1
    %1588 = torch.aten.slice.Tensor %1587, %int1_1599, %int0_1600, %int9223372036854775807_1601, %int1_1602 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %1588, [%292], affine_map<()[s0] -> (s0 * 32, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_1603 = torch.constant.int 0
    %1589 = torch.aten.unsqueeze %1588, %int0_1603 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %1589, [%292], affine_map<()[s0] -> (1, s0 * 32, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_1604 = torch.constant.int 2
    %1590 = torch.aten.unsqueeze %1589, %int2_1604 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %1590, [%292], affine_map<()[s0] -> (1, s0 * 32, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_1605 = torch.constant.int 3
    %int0_1606 = torch.constant.int 0
    %int9223372036854775807_1607 = torch.constant.int 9223372036854775807
    %int1_1608 = torch.constant.int 1
    %1591 = torch.aten.slice.Tensor %1590, %int3_1605, %int0_1606, %int9223372036854775807_1607, %int1_1608 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %1591, [%292], affine_map<()[s0] -> (1, s0 * 32, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %1592 = torch_c.to_builtin_tensor %1539 : !torch.vtensor<[4,?,8,128],f16> -> tensor<4x?x8x128xf16>
    %c1_1609 = arith.constant 1 : index
    %dim_1610 = tensor.dim %1592, %c1_1609 : tensor<4x?x8x128xf16>
    %1593 = flow.tensor.bitcast %1592 : tensor<4x?x8x128xf16>{%dim_1610} -> tensor<4x?x8x64xcomplex<f16>>{%dim_1610}
    %1594 = torch_c.from_builtin_tensor %1593 : tensor<4x?x8x64xcomplex<f16>> -> !torch.vtensor<[4,?,8,64],complex<f16>>
    torch.bind_symbolic_shape %1594, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 64)> : !torch.vtensor<[4,?,8,64],complex<f16>>
    %1595 = torch.aten.mul.Tensor %1594, %1591 : !torch.vtensor<[4,?,8,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,8,64],complex<f32>>
    torch.bind_symbolic_shape %1595, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 64)> : !torch.vtensor<[4,?,8,64],complex<f32>>
    %1596 = torch_c.to_builtin_tensor %1595 : !torch.vtensor<[4,?,8,64],complex<f32>> -> tensor<4x?x8x64xcomplex<f32>>
    %c1_1611 = arith.constant 1 : index
    %dim_1612 = tensor.dim %1596, %c1_1611 : tensor<4x?x8x64xcomplex<f32>>
    %1597 = flow.tensor.bitcast %1596 : tensor<4x?x8x64xcomplex<f32>>{%dim_1612} -> tensor<4x?x8x128xf32>{%dim_1612}
    %1598 = torch_c.from_builtin_tensor %1597 : tensor<4x?x8x128xf32> -> !torch.vtensor<[4,?,8,128],f32>
    torch.bind_symbolic_shape %1598, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 128)> : !torch.vtensor<[4,?,8,128],f32>
    %int5_1613 = torch.constant.int 5
    %1599 = torch.prims.convert_element_type %1598, %int5_1613 : !torch.vtensor<[4,?,8,128],f32>, !torch.int -> !torch.vtensor<[4,?,8,128],f16>
    torch.bind_symbolic_shape %1599, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 128)> : !torch.vtensor<[4,?,8,128],f16>
    %int64_1614 = torch.constant.int 64
    %1600 = torch.aten.mul.Scalar %arg2, %int64_1614 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %1600, [%292], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int12 = torch.constant.int 12
    %int1_1615 = torch.constant.int 1
    %1601 = torch.aten.add.Scalar %1600, %int12, %int1_1615 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %1601, [%292], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int4_1616 = torch.constant.int 4
    %int32_1617 = torch.constant.int 32
    %int8_1618 = torch.constant.int 8
    %int128_1619 = torch.constant.int 128
    %1602 = torch.prim.ListConstruct %int4_1616, %425, %int32_1617, %int8_1618, %int128_1619 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1603 = torch.aten.view %1599, %1602 : !torch.vtensor<[4,?,8,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,8,128],f16>
    torch.bind_symbolic_shape %1603, [%292], affine_map<()[s0] -> (4, s0, 32, 8, 128)> : !torch.vtensor<[4,?,32,8,128],f16>
    %int4_1620 = torch.constant.int 4
    %1604 = torch.aten.mul.int %int4_1620, %425 : !torch.int, !torch.int -> !torch.int
    %int32_1621 = torch.constant.int 32
    %int8_1622 = torch.constant.int 8
    %int128_1623 = torch.constant.int 128
    %1605 = torch.prim.ListConstruct %1604, %int32_1621, %int8_1622, %int128_1623 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1606 = torch.aten.view %1603, %1605 : !torch.vtensor<[4,?,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %1606, [%292], affine_map<()[s0] -> (s0 * 4, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int4_1624 = torch.constant.int 4
    %1607 = torch.aten.mul.int %int4_1624, %425 : !torch.int, !torch.int -> !torch.int
    %1608 = torch.prim.ListConstruct %1607 : (!torch.int) -> !torch.list<int>
    %1609 = torch.aten.view %1601, %1608 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %1609, [%292], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int32_1625 = torch.constant.int 32
    %int2_1626 = torch.constant.int 2
    %int32_1627 = torch.constant.int 32
    %int8_1628 = torch.constant.int 8
    %int128_1629 = torch.constant.int 128
    %1610 = torch.prim.ListConstruct %416, %int32_1625, %int2_1626, %int32_1627, %int8_1628, %int128_1629 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1611 = torch.aten.view %1443, %1610 : !torch.vtensor<[?,2097152],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f16>
    torch.bind_symbolic_shape %1611, [%293], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f16>
    %int32_1630 = torch.constant.int 32
    %1612 = torch.aten.mul.int %416, %int32_1630 : !torch.int, !torch.int -> !torch.int
    %int2_1631 = torch.constant.int 2
    %1613 = torch.aten.mul.int %1612, %int2_1631 : !torch.int, !torch.int -> !torch.int
    %int32_1632 = torch.constant.int 32
    %int8_1633 = torch.constant.int 8
    %int128_1634 = torch.constant.int 128
    %1614 = torch.prim.ListConstruct %1613, %int32_1632, %int8_1633, %int128_1634 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1615 = torch.aten.view %1611, %1614 : !torch.vtensor<[?,32,2,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %1615, [%293], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %1616 = torch.prim.ListConstruct %1609 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_1635 = torch.constant.bool false
    %1617 = torch.aten.index_put %1615, %1616, %1606, %false_1635 : !torch.vtensor<[?,32,8,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,32,8,128],f16>, !torch.bool -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %1617, [%293], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int32_1636 = torch.constant.int 32
    %int2_1637 = torch.constant.int 2
    %int32_1638 = torch.constant.int 32
    %int8_1639 = torch.constant.int 8
    %int128_1640 = torch.constant.int 128
    %1618 = torch.prim.ListConstruct %416, %int32_1636, %int2_1637, %int32_1638, %int8_1639, %int128_1640 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1619 = torch.aten.view %1617, %1618 : !torch.vtensor<[?,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f16>
    torch.bind_symbolic_shape %1619, [%293], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f16>
    %int2097152_1641 = torch.constant.int 2097152
    %1620 = torch.prim.ListConstruct %416, %int2097152_1641 : (!torch.int, !torch.int) -> !torch.list<int>
    %1621 = torch.aten.view %1619, %1620 : !torch.vtensor<[?,32,2,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,2097152],f16>
    torch.bind_symbolic_shape %1621, [%293], affine_map<()[s0] -> (s0, 2097152)> : !torch.vtensor<[?,2097152],f16>
    %int32_1642 = torch.constant.int 32
    %int2_1643 = torch.constant.int 2
    %int32_1644 = torch.constant.int 32
    %int8_1645 = torch.constant.int 8
    %int128_1646 = torch.constant.int 128
    %1622 = torch.prim.ListConstruct %416, %int32_1642, %int2_1643, %int32_1644, %int8_1645, %int128_1646 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1623 = torch.aten.view %1621, %1622 : !torch.vtensor<[?,2097152],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f16>
    torch.bind_symbolic_shape %1623, [%293], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f16>
    %int32_1647 = torch.constant.int 32
    %int8_1648 = torch.constant.int 8
    %int128_1649 = torch.constant.int 128
    %1624 = torch.prim.ListConstruct %1613, %int32_1647, %int8_1648, %int128_1649 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1625 = torch.aten.view %1623, %1624 : !torch.vtensor<[?,32,2,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %1625, [%293], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int4_1650 = torch.constant.int 4
    %int32_1651 = torch.constant.int 32
    %int8_1652 = torch.constant.int 8
    %int128_1653 = torch.constant.int 128
    %1626 = torch.prim.ListConstruct %int4_1650, %425, %int32_1651, %int8_1652, %int128_1653 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1627 = torch.aten.view %1541, %1626 : !torch.vtensor<[4,?,8,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,8,128],f16>
    torch.bind_symbolic_shape %1627, [%292], affine_map<()[s0] -> (4, s0, 32, 8, 128)> : !torch.vtensor<[4,?,32,8,128],f16>
    %int4_1654 = torch.constant.int 4
    %1628 = torch.aten.mul.int %int4_1654, %425 : !torch.int, !torch.int -> !torch.int
    %int32_1655 = torch.constant.int 32
    %int8_1656 = torch.constant.int 8
    %int128_1657 = torch.constant.int 128
    %1629 = torch.prim.ListConstruct %1628, %int32_1655, %int8_1656, %int128_1657 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1630 = torch.aten.view %1627, %1629 : !torch.vtensor<[4,?,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %1630, [%292], affine_map<()[s0] -> (s0 * 4, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int1_1658 = torch.constant.int 1
    %int1_1659 = torch.constant.int 1
    %1631 = torch.aten.add.Scalar %1601, %int1_1658, %int1_1659 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %1631, [%292], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int4_1660 = torch.constant.int 4
    %1632 = torch.aten.mul.int %int4_1660, %425 : !torch.int, !torch.int -> !torch.int
    %1633 = torch.prim.ListConstruct %1632 : (!torch.int) -> !torch.list<int>
    %1634 = torch.aten.view %1631, %1633 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %1634, [%292], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %1635 = torch.prim.ListConstruct %1634 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_1661 = torch.constant.bool false
    %1636 = torch.aten.index_put %1625, %1635, %1630, %false_1661 : !torch.vtensor<[?,32,8,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,32,8,128],f16>, !torch.bool -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %1636, [%293], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int32_1662 = torch.constant.int 32
    %int2_1663 = torch.constant.int 2
    %int32_1664 = torch.constant.int 32
    %int8_1665 = torch.constant.int 8
    %int128_1666 = torch.constant.int 128
    %1637 = torch.prim.ListConstruct %416, %int32_1662, %int2_1663, %int32_1664, %int8_1665, %int128_1666 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1638 = torch.aten.view %1636, %1637 : !torch.vtensor<[?,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f16>
    torch.bind_symbolic_shape %1638, [%293], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f16>
    %int2097152_1667 = torch.constant.int 2097152
    %1639 = torch.prim.ListConstruct %416, %int2097152_1667 : (!torch.int, !torch.int) -> !torch.list<int>
    %1640 = torch.aten.view %1638, %1639 : !torch.vtensor<[?,32,2,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,2097152],f16>
    torch.bind_symbolic_shape %1640, [%293], affine_map<()[s0] -> (s0, 2097152)> : !torch.vtensor<[?,2097152],f16>
    %int-2_1668 = torch.constant.int -2
    %1641 = torch.aten.unsqueeze %1599, %int-2_1668 : !torch.vtensor<[4,?,8,128],f16>, !torch.int -> !torch.vtensor<[4,?,8,1,128],f16>
    torch.bind_symbolic_shape %1641, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 1, 128)> : !torch.vtensor<[4,?,8,1,128],f16>
    %int4_1669 = torch.constant.int 4
    %int8_1670 = torch.constant.int 8
    %int4_1671 = torch.constant.int 4
    %int128_1672 = torch.constant.int 128
    %1642 = torch.prim.ListConstruct %int4_1669, %1585, %int8_1670, %int4_1671, %int128_1672 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_1673 = torch.constant.bool false
    %1643 = torch.aten.expand %1641, %1642, %false_1673 : !torch.vtensor<[4,?,8,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,8,4,128],f16>
    torch.bind_symbolic_shape %1643, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 4, 128)> : !torch.vtensor<[4,?,8,4,128],f16>
    %int0_1674 = torch.constant.int 0
    %1644 = torch.aten.clone %1643, %int0_1674 : !torch.vtensor<[4,?,8,4,128],f16>, !torch.int -> !torch.vtensor<[4,?,8,4,128],f16>
    torch.bind_symbolic_shape %1644, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 4, 128)> : !torch.vtensor<[4,?,8,4,128],f16>
    %int4_1675 = torch.constant.int 4
    %int32_1676 = torch.constant.int 32
    %int128_1677 = torch.constant.int 128
    %1645 = torch.prim.ListConstruct %int4_1675, %1585, %int32_1676, %int128_1677 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1646 = torch.aten._unsafe_view %1644, %1645 : !torch.vtensor<[4,?,8,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %1646, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int-2_1678 = torch.constant.int -2
    %1647 = torch.aten.unsqueeze %1541, %int-2_1678 : !torch.vtensor<[4,?,8,128],f16>, !torch.int -> !torch.vtensor<[4,?,8,1,128],f16>
    torch.bind_symbolic_shape %1647, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 1, 128)> : !torch.vtensor<[4,?,8,1,128],f16>
    %int1_1679 = torch.constant.int 1
    %1648 = torch.aten.size.int %1535, %int1_1679 : !torch.vtensor<[4,?,1024],f16>, !torch.int -> !torch.int
    %int4_1680 = torch.constant.int 4
    %int8_1681 = torch.constant.int 8
    %int4_1682 = torch.constant.int 4
    %int128_1683 = torch.constant.int 128
    %1649 = torch.prim.ListConstruct %int4_1680, %1648, %int8_1681, %int4_1682, %int128_1683 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_1684 = torch.constant.bool false
    %1650 = torch.aten.expand %1647, %1649, %false_1684 : !torch.vtensor<[4,?,8,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,8,4,128],f16>
    torch.bind_symbolic_shape %1650, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 4, 128)> : !torch.vtensor<[4,?,8,4,128],f16>
    %int0_1685 = torch.constant.int 0
    %1651 = torch.aten.clone %1650, %int0_1685 : !torch.vtensor<[4,?,8,4,128],f16>, !torch.int -> !torch.vtensor<[4,?,8,4,128],f16>
    torch.bind_symbolic_shape %1651, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 4, 128)> : !torch.vtensor<[4,?,8,4,128],f16>
    %int4_1686 = torch.constant.int 4
    %int32_1687 = torch.constant.int 32
    %int128_1688 = torch.constant.int 128
    %1652 = torch.prim.ListConstruct %int4_1686, %1648, %int32_1687, %int128_1688 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1653 = torch.aten._unsafe_view %1651, %1652 : !torch.vtensor<[4,?,8,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %1653, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int1_1689 = torch.constant.int 1
    %int2_1690 = torch.constant.int 2
    %1654 = torch.aten.transpose.int %1570, %int1_1689, %int2_1690 : !torch.vtensor<[4,?,32,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,32,?,128],f16>
    torch.bind_symbolic_shape %1654, [%292], affine_map<()[s0] -> (4, 32, s0 * 32, 128)> : !torch.vtensor<[4,32,?,128],f16>
    %int1_1691 = torch.constant.int 1
    %int2_1692 = torch.constant.int 2
    %1655 = torch.aten.transpose.int %1646, %int1_1691, %int2_1692 : !torch.vtensor<[4,?,32,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,32,?,128],f16>
    torch.bind_symbolic_shape %1655, [%292], affine_map<()[s0] -> (4, 32, s0 * 32, 128)> : !torch.vtensor<[4,32,?,128],f16>
    %int1_1693 = torch.constant.int 1
    %int2_1694 = torch.constant.int 2
    %1656 = torch.aten.transpose.int %1653, %int1_1693, %int2_1694 : !torch.vtensor<[4,?,32,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,32,?,128],f16>
    torch.bind_symbolic_shape %1656, [%292], affine_map<()[s0] -> (4, 32, s0 * 32, 128)> : !torch.vtensor<[4,32,?,128],f16>
    %float0.000000e00_1695 = torch.constant.float 0.000000e+00
    %false_1696 = torch.constant.bool false
    %none_1697 = torch.constant.none
    %1657:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%1654, %1655, %1656, %float0.000000e00_1695, %false_1696, %320, %none_1697) : (!torch.vtensor<[4,32,?,128],f16>, !torch.vtensor<[4,32,?,128],f16>, !torch.vtensor<[4,32,?,128],f16>, !torch.float, !torch.bool, !torch.vtensor<[4,1,?,?],f16>, !torch.none) -> (!torch.vtensor<[4,32,?,128],f16>, !torch.vtensor<[4,32,?],f32>) 
    torch.bind_symbolic_shape %1657#0, [%292], affine_map<()[s0] -> (4, 32, s0 * 32, 128)> : !torch.vtensor<[4,32,?,128],f16>
    %int1_1698 = torch.constant.int 1
    %int2_1699 = torch.constant.int 2
    %1658 = torch.aten.transpose.int %1657#0, %int1_1698, %int2_1699 : !torch.vtensor<[4,32,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %1658, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int4_1700 = torch.constant.int 4
    %int4096_1701 = torch.constant.int 4096
    %1659 = torch.prim.ListConstruct %int4_1700, %1556, %int4096_1701 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1660 = torch.aten.view %1658, %1659 : !torch.vtensor<[4,?,32,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %1660, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int-2_1702 = torch.constant.int -2
    %int-1_1703 = torch.constant.int -1
    %1661 = torch.aten.transpose.int %59, %int-2_1702, %int-1_1703 : !torch.vtensor<[4096,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,4096],f16>
    %int4_1704 = torch.constant.int 4
    %1662 = torch.aten.mul.int %int4_1704, %1556 : !torch.int, !torch.int -> !torch.int
    %int4096_1705 = torch.constant.int 4096
    %1663 = torch.prim.ListConstruct %1662, %int4096_1705 : (!torch.int, !torch.int) -> !torch.list<int>
    %1664 = torch.aten.view %1660, %1663 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %1664, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %1665 = torch.aten.mm %1664, %1661 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %1665, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_1706 = torch.constant.int 4
    %int4096_1707 = torch.constant.int 4096
    %1666 = torch.prim.ListConstruct %int4_1706, %1556, %int4096_1707 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1667 = torch.aten.view %1665, %1666 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %1667, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1708 = torch.constant.int 1
    %1668 = torch.aten.add.Tensor %1505, %1667, %int1_1708 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %1668, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int6_1709 = torch.constant.int 6
    %1669 = torch.prims.convert_element_type %1668, %int6_1709 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %1669, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_1710 = torch.constant.int 2
    %1670 = torch.aten.pow.Tensor_Scalar %1669, %int2_1710 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %1670, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int-1_1711 = torch.constant.int -1
    %1671 = torch.prim.ListConstruct %int-1_1711 : (!torch.int) -> !torch.list<int>
    %true_1712 = torch.constant.bool true
    %none_1713 = torch.constant.none
    %1672 = torch.aten.mean.dim %1670, %1671, %true_1712, %none_1713 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %1672, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_1714 = torch.constant.float 9.9999997473787516E-6
    %int1_1715 = torch.constant.int 1
    %1673 = torch.aten.add.Scalar %1672, %float9.999990e-06_1714, %int1_1715 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %1673, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %1674 = torch.aten.rsqrt %1673 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %1674, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %1675 = torch.aten.mul.Tensor %1669, %1674 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %1675, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %1676 = torch.aten.mul.Tensor %60, %1675 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %1676, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int5_1716 = torch.constant.int 5
    %1677 = torch.prims.convert_element_type %1676, %int5_1716 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %1677, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int-2_1717 = torch.constant.int -2
    %int-1_1718 = torch.constant.int -1
    %1678 = torch.aten.transpose.int %61, %int-2_1717, %int-1_1718 : !torch.vtensor<[14336,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,14336],f16>
    %int4_1719 = torch.constant.int 4
    %1679 = torch.aten.mul.int %int4_1719, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_1720 = torch.constant.int 4096
    %1680 = torch.prim.ListConstruct %1679, %int4096_1720 : (!torch.int, !torch.int) -> !torch.list<int>
    %1681 = torch.aten.view %1677, %1680 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %1681, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %1682 = torch.aten.mm %1681, %1678 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,14336],f16> -> !torch.vtensor<[?,14336],f16>
    torch.bind_symbolic_shape %1682, [%292], affine_map<()[s0] -> (s0 * 128, 14336)> : !torch.vtensor<[?,14336],f16>
    %int4_1721 = torch.constant.int 4
    %int14336_1722 = torch.constant.int 14336
    %1683 = torch.prim.ListConstruct %int4_1721, %294, %int14336_1722 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1684 = torch.aten.view %1682, %1683 : !torch.vtensor<[?,14336],f16>, !torch.list<int> -> !torch.vtensor<[4,?,14336],f16>
    torch.bind_symbolic_shape %1684, [%292], affine_map<()[s0] -> (4, s0 * 32, 14336)> : !torch.vtensor<[4,?,14336],f16>
    %1685 = torch.aten.silu %1684 : !torch.vtensor<[4,?,14336],f16> -> !torch.vtensor<[4,?,14336],f16>
    torch.bind_symbolic_shape %1685, [%292], affine_map<()[s0] -> (4, s0 * 32, 14336)> : !torch.vtensor<[4,?,14336],f16>
    %int-2_1723 = torch.constant.int -2
    %int-1_1724 = torch.constant.int -1
    %1686 = torch.aten.transpose.int %62, %int-2_1723, %int-1_1724 : !torch.vtensor<[14336,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,14336],f16>
    %int4_1725 = torch.constant.int 4
    %1687 = torch.aten.mul.int %int4_1725, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_1726 = torch.constant.int 4096
    %1688 = torch.prim.ListConstruct %1687, %int4096_1726 : (!torch.int, !torch.int) -> !torch.list<int>
    %1689 = torch.aten.view %1677, %1688 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %1689, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %1690 = torch.aten.mm %1689, %1686 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,14336],f16> -> !torch.vtensor<[?,14336],f16>
    torch.bind_symbolic_shape %1690, [%292], affine_map<()[s0] -> (s0 * 128, 14336)> : !torch.vtensor<[?,14336],f16>
    %int4_1727 = torch.constant.int 4
    %int14336_1728 = torch.constant.int 14336
    %1691 = torch.prim.ListConstruct %int4_1727, %294, %int14336_1728 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1692 = torch.aten.view %1690, %1691 : !torch.vtensor<[?,14336],f16>, !torch.list<int> -> !torch.vtensor<[4,?,14336],f16>
    torch.bind_symbolic_shape %1692, [%292], affine_map<()[s0] -> (4, s0 * 32, 14336)> : !torch.vtensor<[4,?,14336],f16>
    %1693 = torch.aten.mul.Tensor %1685, %1692 : !torch.vtensor<[4,?,14336],f16>, !torch.vtensor<[4,?,14336],f16> -> !torch.vtensor<[4,?,14336],f16>
    torch.bind_symbolic_shape %1693, [%292], affine_map<()[s0] -> (4, s0 * 32, 14336)> : !torch.vtensor<[4,?,14336],f16>
    %int-2_1729 = torch.constant.int -2
    %int-1_1730 = torch.constant.int -1
    %1694 = torch.aten.transpose.int %63, %int-2_1729, %int-1_1730 : !torch.vtensor<[4096,14336],f16>, !torch.int, !torch.int -> !torch.vtensor<[14336,4096],f16>
    %int1_1731 = torch.constant.int 1
    %1695 = torch.aten.size.int %1684, %int1_1731 : !torch.vtensor<[4,?,14336],f16>, !torch.int -> !torch.int
    %int4_1732 = torch.constant.int 4
    %1696 = torch.aten.mul.int %int4_1732, %1695 : !torch.int, !torch.int -> !torch.int
    %int14336_1733 = torch.constant.int 14336
    %1697 = torch.prim.ListConstruct %1696, %int14336_1733 : (!torch.int, !torch.int) -> !torch.list<int>
    %1698 = torch.aten.view %1693, %1697 : !torch.vtensor<[4,?,14336],f16>, !torch.list<int> -> !torch.vtensor<[?,14336],f16>
    torch.bind_symbolic_shape %1698, [%292], affine_map<()[s0] -> (s0 * 128, 14336)> : !torch.vtensor<[?,14336],f16>
    %1699 = torch.aten.mm %1698, %1694 : !torch.vtensor<[?,14336],f16>, !torch.vtensor<[14336,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %1699, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_1734 = torch.constant.int 4
    %int4096_1735 = torch.constant.int 4096
    %1700 = torch.prim.ListConstruct %int4_1734, %1695, %int4096_1735 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1701 = torch.aten.view %1699, %1700 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %1701, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1736 = torch.constant.int 1
    %1702 = torch.aten.add.Tensor %1668, %1701, %int1_1736 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %1702, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int6_1737 = torch.constant.int 6
    %1703 = torch.prims.convert_element_type %1702, %int6_1737 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %1703, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_1738 = torch.constant.int 2
    %1704 = torch.aten.pow.Tensor_Scalar %1703, %int2_1738 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %1704, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int-1_1739 = torch.constant.int -1
    %1705 = torch.prim.ListConstruct %int-1_1739 : (!torch.int) -> !torch.list<int>
    %true_1740 = torch.constant.bool true
    %none_1741 = torch.constant.none
    %1706 = torch.aten.mean.dim %1704, %1705, %true_1740, %none_1741 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %1706, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_1742 = torch.constant.float 9.9999997473787516E-6
    %int1_1743 = torch.constant.int 1
    %1707 = torch.aten.add.Scalar %1706, %float9.999990e-06_1742, %int1_1743 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %1707, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %1708 = torch.aten.rsqrt %1707 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %1708, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %1709 = torch.aten.mul.Tensor %1703, %1708 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %1709, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %1710 = torch.aten.mul.Tensor %64, %1709 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %1710, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int5_1744 = torch.constant.int 5
    %1711 = torch.prims.convert_element_type %1710, %int5_1744 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %1711, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int-2_1745 = torch.constant.int -2
    %int-1_1746 = torch.constant.int -1
    %1712 = torch.aten.transpose.int %65, %int-2_1745, %int-1_1746 : !torch.vtensor<[4096,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,4096],f16>
    %int4_1747 = torch.constant.int 4
    %1713 = torch.aten.mul.int %int4_1747, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_1748 = torch.constant.int 4096
    %1714 = torch.prim.ListConstruct %1713, %int4096_1748 : (!torch.int, !torch.int) -> !torch.list<int>
    %1715 = torch.aten.view %1711, %1714 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %1715, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %1716 = torch.aten.mm %1715, %1712 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %1716, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_1749 = torch.constant.int 4
    %int4096_1750 = torch.constant.int 4096
    %1717 = torch.prim.ListConstruct %int4_1749, %294, %int4096_1750 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1718 = torch.aten.view %1716, %1717 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %1718, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int-2_1751 = torch.constant.int -2
    %int-1_1752 = torch.constant.int -1
    %1719 = torch.aten.transpose.int %66, %int-2_1751, %int-1_1752 : !torch.vtensor<[1024,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,1024],f16>
    %int4_1753 = torch.constant.int 4
    %1720 = torch.aten.mul.int %int4_1753, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_1754 = torch.constant.int 4096
    %1721 = torch.prim.ListConstruct %1720, %int4096_1754 : (!torch.int, !torch.int) -> !torch.list<int>
    %1722 = torch.aten.view %1711, %1721 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %1722, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %1723 = torch.aten.mm %1722, %1719 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1024],f16> -> !torch.vtensor<[?,1024],f16>
    torch.bind_symbolic_shape %1723, [%292], affine_map<()[s0] -> (s0 * 128, 1024)> : !torch.vtensor<[?,1024],f16>
    %int4_1755 = torch.constant.int 4
    %int1024_1756 = torch.constant.int 1024
    %1724 = torch.prim.ListConstruct %int4_1755, %294, %int1024_1756 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1725 = torch.aten.view %1723, %1724 : !torch.vtensor<[?,1024],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1024],f16>
    torch.bind_symbolic_shape %1725, [%292], affine_map<()[s0] -> (4, s0 * 32, 1024)> : !torch.vtensor<[4,?,1024],f16>
    %int-2_1757 = torch.constant.int -2
    %int-1_1758 = torch.constant.int -1
    %1726 = torch.aten.transpose.int %67, %int-2_1757, %int-1_1758 : !torch.vtensor<[1024,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,1024],f16>
    %int4_1759 = torch.constant.int 4
    %1727 = torch.aten.mul.int %int4_1759, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_1760 = torch.constant.int 4096
    %1728 = torch.prim.ListConstruct %1727, %int4096_1760 : (!torch.int, !torch.int) -> !torch.list<int>
    %1729 = torch.aten.view %1711, %1728 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %1729, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %1730 = torch.aten.mm %1729, %1726 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1024],f16> -> !torch.vtensor<[?,1024],f16>
    torch.bind_symbolic_shape %1730, [%292], affine_map<()[s0] -> (s0 * 128, 1024)> : !torch.vtensor<[?,1024],f16>
    %int4_1761 = torch.constant.int 4
    %int1024_1762 = torch.constant.int 1024
    %1731 = torch.prim.ListConstruct %int4_1761, %294, %int1024_1762 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1732 = torch.aten.view %1730, %1731 : !torch.vtensor<[?,1024],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1024],f16>
    torch.bind_symbolic_shape %1732, [%292], affine_map<()[s0] -> (4, s0 * 32, 1024)> : !torch.vtensor<[4,?,1024],f16>
    %int4_1763 = torch.constant.int 4
    %int32_1764 = torch.constant.int 32
    %int128_1765 = torch.constant.int 128
    %1733 = torch.prim.ListConstruct %int4_1763, %294, %int32_1764, %int128_1765 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1734 = torch.aten.view %1718, %1733 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %1734, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int4_1766 = torch.constant.int 4
    %int8_1767 = torch.constant.int 8
    %int128_1768 = torch.constant.int 128
    %1735 = torch.prim.ListConstruct %int4_1766, %294, %int8_1767, %int128_1768 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1736 = torch.aten.view %1725, %1735 : !torch.vtensor<[4,?,1024],f16>, !torch.list<int> -> !torch.vtensor<[4,?,8,128],f16>
    torch.bind_symbolic_shape %1736, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 128)> : !torch.vtensor<[4,?,8,128],f16>
    %int4_1769 = torch.constant.int 4
    %int8_1770 = torch.constant.int 8
    %int128_1771 = torch.constant.int 128
    %1737 = torch.prim.ListConstruct %int4_1769, %294, %int8_1770, %int128_1771 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1738 = torch.aten.view %1732, %1737 : !torch.vtensor<[4,?,1024],f16>, !torch.list<int> -> !torch.vtensor<[4,?,8,128],f16>
    torch.bind_symbolic_shape %1738, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 128)> : !torch.vtensor<[4,?,8,128],f16>
    %int131072_1772 = torch.constant.int 131072
    %none_1773 = torch.constant.none
    %none_1774 = torch.constant.none
    %cpu_1775 = torch.constant.device "cpu"
    %false_1776 = torch.constant.bool false
    %1739 = torch.aten.arange %int131072_1772, %none_1773, %none_1774, %cpu_1775, %false_1776 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64>
    %int0_1777 = torch.constant.int 0
    %int128_1778 = torch.constant.int 128
    %int2_1779 = torch.constant.int 2
    %none_1780 = torch.constant.none
    %none_1781 = torch.constant.none
    %cpu_1782 = torch.constant.device "cpu"
    %false_1783 = torch.constant.bool false
    %1740 = torch.aten.arange.start_step %int0_1777, %int128_1778, %int2_1779, %none_1780, %none_1781, %cpu_1782, %false_1783 : !torch.int, !torch.int, !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64>
    %int0_1784 = torch.constant.int 0
    %int0_1785 = torch.constant.int 0
    %int64_1786 = torch.constant.int 64
    %int1_1787 = torch.constant.int 1
    %1741 = torch.aten.slice.Tensor %1740, %int0_1784, %int0_1785, %int64_1786, %int1_1787 : !torch.vtensor<[64],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[64],si64>
    %int6_1788 = torch.constant.int 6
    %1742 = torch.prims.convert_element_type %1741, %int6_1788 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32>
    %int128_1789 = torch.constant.int 128
    %1743 = torch.aten.div.Scalar %1742, %int128_1789 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32>
    %float5.000000e05_1790 = torch.constant.float 5.000000e+05
    %1744 = torch.aten.pow.Scalar %float5.000000e05_1790, %1743 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %1745 = torch.aten.reciprocal %1744 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %float1.000000e00_1791 = torch.constant.float 1.000000e+00
    %1746 = torch.aten.mul.Scalar %1745, %float1.000000e00_1791 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32>
    %int131072_1792 = torch.constant.int 131072
    %int1_1793 = torch.constant.int 1
    %1747 = torch.prim.ListConstruct %int131072_1792, %int1_1793 : (!torch.int, !torch.int) -> !torch.list<int>
    %1748 = torch.aten.view %1739, %1747 : !torch.vtensor<[131072],si64>, !torch.list<int> -> !torch.vtensor<[131072,1],si64>
    %1749 = torch.aten.mul.Tensor %1748, %1746 : !torch.vtensor<[131072,1],si64>, !torch.vtensor<[64],f32> -> !torch.vtensor<[131072,64],f32>
    %1750 = torch.aten.cos %1749 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %1751 = torch.aten.sin %1749 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %1752 = torch.aten.complex %1750, %1751 : !torch.vtensor<[131072,64],f32>, !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],complex<f32>>
    %int1_1794 = torch.constant.int 1
    %1753 = torch.aten.size.int %1718, %int1_1794 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.int
    %int0_1795 = torch.constant.int 0
    %1754 = torch.aten.add.int %int0_1795, %1753 : !torch.int, !torch.int -> !torch.int
    %int0_1796 = torch.constant.int 0
    %int0_1797 = torch.constant.int 0
    %int1_1798 = torch.constant.int 1
    %1755 = torch.aten.slice.Tensor %1752, %int0_1796, %int0_1797, %1754, %int1_1798 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %1755, [%292], affine_map<()[s0] -> (s0 * 32, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_1799 = torch.constant.int 1
    %int0_1800 = torch.constant.int 0
    %int9223372036854775807_1801 = torch.constant.int 9223372036854775807
    %int1_1802 = torch.constant.int 1
    %1756 = torch.aten.slice.Tensor %1755, %int1_1799, %int0_1800, %int9223372036854775807_1801, %int1_1802 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %1756, [%292], affine_map<()[s0] -> (s0 * 32, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_1803 = torch.constant.int 0
    %1757 = torch.aten.unsqueeze %1756, %int0_1803 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %1757, [%292], affine_map<()[s0] -> (1, s0 * 32, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_1804 = torch.constant.int 2
    %1758 = torch.aten.unsqueeze %1757, %int2_1804 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %1758, [%292], affine_map<()[s0] -> (1, s0 * 32, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_1805 = torch.constant.int 3
    %int0_1806 = torch.constant.int 0
    %int9223372036854775807_1807 = torch.constant.int 9223372036854775807
    %int1_1808 = torch.constant.int 1
    %1759 = torch.aten.slice.Tensor %1758, %int3_1805, %int0_1806, %int9223372036854775807_1807, %int1_1808 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %1759, [%292], affine_map<()[s0] -> (1, s0 * 32, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %1760 = torch_c.to_builtin_tensor %1734 : !torch.vtensor<[4,?,32,128],f16> -> tensor<4x?x32x128xf16>
    %c1_1809 = arith.constant 1 : index
    %dim_1810 = tensor.dim %1760, %c1_1809 : tensor<4x?x32x128xf16>
    %1761 = flow.tensor.bitcast %1760 : tensor<4x?x32x128xf16>{%dim_1810} -> tensor<4x?x32x64xcomplex<f16>>{%dim_1810}
    %1762 = torch_c.from_builtin_tensor %1761 : tensor<4x?x32x64xcomplex<f16>> -> !torch.vtensor<[4,?,32,64],complex<f16>>
    torch.bind_symbolic_shape %1762, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 64)> : !torch.vtensor<[4,?,32,64],complex<f16>>
    %1763 = torch.aten.mul.Tensor %1762, %1759 : !torch.vtensor<[4,?,32,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,32,64],complex<f32>>
    torch.bind_symbolic_shape %1763, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 64)> : !torch.vtensor<[4,?,32,64],complex<f32>>
    %1764 = torch_c.to_builtin_tensor %1763 : !torch.vtensor<[4,?,32,64],complex<f32>> -> tensor<4x?x32x64xcomplex<f32>>
    %c1_1811 = arith.constant 1 : index
    %dim_1812 = tensor.dim %1764, %c1_1811 : tensor<4x?x32x64xcomplex<f32>>
    %1765 = flow.tensor.bitcast %1764 : tensor<4x?x32x64xcomplex<f32>>{%dim_1812} -> tensor<4x?x32x128xf32>{%dim_1812}
    %1766 = torch_c.from_builtin_tensor %1765 : tensor<4x?x32x128xf32> -> !torch.vtensor<[4,?,32,128],f32>
    torch.bind_symbolic_shape %1766, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f32>
    %int5_1813 = torch.constant.int 5
    %1767 = torch.prims.convert_element_type %1766, %int5_1813 : !torch.vtensor<[4,?,32,128],f32>, !torch.int -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %1767, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int131072_1814 = torch.constant.int 131072
    %none_1815 = torch.constant.none
    %none_1816 = torch.constant.none
    %cpu_1817 = torch.constant.device "cpu"
    %false_1818 = torch.constant.bool false
    %1768 = torch.aten.arange %int131072_1814, %none_1815, %none_1816, %cpu_1817, %false_1818 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64>
    %int0_1819 = torch.constant.int 0
    %int128_1820 = torch.constant.int 128
    %int2_1821 = torch.constant.int 2
    %none_1822 = torch.constant.none
    %none_1823 = torch.constant.none
    %cpu_1824 = torch.constant.device "cpu"
    %false_1825 = torch.constant.bool false
    %1769 = torch.aten.arange.start_step %int0_1819, %int128_1820, %int2_1821, %none_1822, %none_1823, %cpu_1824, %false_1825 : !torch.int, !torch.int, !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64>
    %int0_1826 = torch.constant.int 0
    %int0_1827 = torch.constant.int 0
    %int64_1828 = torch.constant.int 64
    %int1_1829 = torch.constant.int 1
    %1770 = torch.aten.slice.Tensor %1769, %int0_1826, %int0_1827, %int64_1828, %int1_1829 : !torch.vtensor<[64],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[64],si64>
    %int6_1830 = torch.constant.int 6
    %1771 = torch.prims.convert_element_type %1770, %int6_1830 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32>
    %int128_1831 = torch.constant.int 128
    %1772 = torch.aten.div.Scalar %1771, %int128_1831 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32>
    %float5.000000e05_1832 = torch.constant.float 5.000000e+05
    %1773 = torch.aten.pow.Scalar %float5.000000e05_1832, %1772 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %1774 = torch.aten.reciprocal %1773 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %float1.000000e00_1833 = torch.constant.float 1.000000e+00
    %1775 = torch.aten.mul.Scalar %1774, %float1.000000e00_1833 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32>
    %int131072_1834 = torch.constant.int 131072
    %int1_1835 = torch.constant.int 1
    %1776 = torch.prim.ListConstruct %int131072_1834, %int1_1835 : (!torch.int, !torch.int) -> !torch.list<int>
    %1777 = torch.aten.view %1768, %1776 : !torch.vtensor<[131072],si64>, !torch.list<int> -> !torch.vtensor<[131072,1],si64>
    %1778 = torch.aten.mul.Tensor %1777, %1775 : !torch.vtensor<[131072,1],si64>, !torch.vtensor<[64],f32> -> !torch.vtensor<[131072,64],f32>
    %1779 = torch.aten.cos %1778 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %1780 = torch.aten.sin %1778 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %1781 = torch.aten.complex %1779, %1780 : !torch.vtensor<[131072,64],f32>, !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],complex<f32>>
    %int1_1836 = torch.constant.int 1
    %1782 = torch.aten.size.int %1725, %int1_1836 : !torch.vtensor<[4,?,1024],f16>, !torch.int -> !torch.int
    %int0_1837 = torch.constant.int 0
    %1783 = torch.aten.add.int %int0_1837, %1782 : !torch.int, !torch.int -> !torch.int
    %int0_1838 = torch.constant.int 0
    %int0_1839 = torch.constant.int 0
    %int1_1840 = torch.constant.int 1
    %1784 = torch.aten.slice.Tensor %1781, %int0_1838, %int0_1839, %1783, %int1_1840 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %1784, [%292], affine_map<()[s0] -> (s0 * 32, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_1841 = torch.constant.int 1
    %int0_1842 = torch.constant.int 0
    %int9223372036854775807_1843 = torch.constant.int 9223372036854775807
    %int1_1844 = torch.constant.int 1
    %1785 = torch.aten.slice.Tensor %1784, %int1_1841, %int0_1842, %int9223372036854775807_1843, %int1_1844 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %1785, [%292], affine_map<()[s0] -> (s0 * 32, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_1845 = torch.constant.int 0
    %1786 = torch.aten.unsqueeze %1785, %int0_1845 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %1786, [%292], affine_map<()[s0] -> (1, s0 * 32, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_1846 = torch.constant.int 2
    %1787 = torch.aten.unsqueeze %1786, %int2_1846 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %1787, [%292], affine_map<()[s0] -> (1, s0 * 32, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_1847 = torch.constant.int 3
    %int0_1848 = torch.constant.int 0
    %int9223372036854775807_1849 = torch.constant.int 9223372036854775807
    %int1_1850 = torch.constant.int 1
    %1788 = torch.aten.slice.Tensor %1787, %int3_1847, %int0_1848, %int9223372036854775807_1849, %int1_1850 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %1788, [%292], affine_map<()[s0] -> (1, s0 * 32, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %1789 = torch_c.to_builtin_tensor %1736 : !torch.vtensor<[4,?,8,128],f16> -> tensor<4x?x8x128xf16>
    %c1_1851 = arith.constant 1 : index
    %dim_1852 = tensor.dim %1789, %c1_1851 : tensor<4x?x8x128xf16>
    %1790 = flow.tensor.bitcast %1789 : tensor<4x?x8x128xf16>{%dim_1852} -> tensor<4x?x8x64xcomplex<f16>>{%dim_1852}
    %1791 = torch_c.from_builtin_tensor %1790 : tensor<4x?x8x64xcomplex<f16>> -> !torch.vtensor<[4,?,8,64],complex<f16>>
    torch.bind_symbolic_shape %1791, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 64)> : !torch.vtensor<[4,?,8,64],complex<f16>>
    %1792 = torch.aten.mul.Tensor %1791, %1788 : !torch.vtensor<[4,?,8,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,8,64],complex<f32>>
    torch.bind_symbolic_shape %1792, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 64)> : !torch.vtensor<[4,?,8,64],complex<f32>>
    %1793 = torch_c.to_builtin_tensor %1792 : !torch.vtensor<[4,?,8,64],complex<f32>> -> tensor<4x?x8x64xcomplex<f32>>
    %c1_1853 = arith.constant 1 : index
    %dim_1854 = tensor.dim %1793, %c1_1853 : tensor<4x?x8x64xcomplex<f32>>
    %1794 = flow.tensor.bitcast %1793 : tensor<4x?x8x64xcomplex<f32>>{%dim_1854} -> tensor<4x?x8x128xf32>{%dim_1854}
    %1795 = torch_c.from_builtin_tensor %1794 : tensor<4x?x8x128xf32> -> !torch.vtensor<[4,?,8,128],f32>
    torch.bind_symbolic_shape %1795, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 128)> : !torch.vtensor<[4,?,8,128],f32>
    %int5_1855 = torch.constant.int 5
    %1796 = torch.prims.convert_element_type %1795, %int5_1855 : !torch.vtensor<[4,?,8,128],f32>, !torch.int -> !torch.vtensor<[4,?,8,128],f16>
    torch.bind_symbolic_shape %1796, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 128)> : !torch.vtensor<[4,?,8,128],f16>
    %int64_1856 = torch.constant.int 64
    %1797 = torch.aten.mul.Scalar %arg2, %int64_1856 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %1797, [%292], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int14 = torch.constant.int 14
    %int1_1857 = torch.constant.int 1
    %1798 = torch.aten.add.Scalar %1797, %int14, %int1_1857 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %1798, [%292], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int4_1858 = torch.constant.int 4
    %int32_1859 = torch.constant.int 32
    %int8_1860 = torch.constant.int 8
    %int128_1861 = torch.constant.int 128
    %1799 = torch.prim.ListConstruct %int4_1858, %425, %int32_1859, %int8_1860, %int128_1861 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1800 = torch.aten.view %1796, %1799 : !torch.vtensor<[4,?,8,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,8,128],f16>
    torch.bind_symbolic_shape %1800, [%292], affine_map<()[s0] -> (4, s0, 32, 8, 128)> : !torch.vtensor<[4,?,32,8,128],f16>
    %int4_1862 = torch.constant.int 4
    %1801 = torch.aten.mul.int %int4_1862, %425 : !torch.int, !torch.int -> !torch.int
    %int32_1863 = torch.constant.int 32
    %int8_1864 = torch.constant.int 8
    %int128_1865 = torch.constant.int 128
    %1802 = torch.prim.ListConstruct %1801, %int32_1863, %int8_1864, %int128_1865 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1803 = torch.aten.view %1800, %1802 : !torch.vtensor<[4,?,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %1803, [%292], affine_map<()[s0] -> (s0 * 4, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int4_1866 = torch.constant.int 4
    %1804 = torch.aten.mul.int %int4_1866, %425 : !torch.int, !torch.int -> !torch.int
    %1805 = torch.prim.ListConstruct %1804 : (!torch.int) -> !torch.list<int>
    %1806 = torch.aten.view %1798, %1805 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %1806, [%292], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int32_1867 = torch.constant.int 32
    %int2_1868 = torch.constant.int 2
    %int32_1869 = torch.constant.int 32
    %int8_1870 = torch.constant.int 8
    %int128_1871 = torch.constant.int 128
    %1807 = torch.prim.ListConstruct %416, %int32_1867, %int2_1868, %int32_1869, %int8_1870, %int128_1871 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1808 = torch.aten.view %1640, %1807 : !torch.vtensor<[?,2097152],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f16>
    torch.bind_symbolic_shape %1808, [%293], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f16>
    %int32_1872 = torch.constant.int 32
    %1809 = torch.aten.mul.int %416, %int32_1872 : !torch.int, !torch.int -> !torch.int
    %int2_1873 = torch.constant.int 2
    %1810 = torch.aten.mul.int %1809, %int2_1873 : !torch.int, !torch.int -> !torch.int
    %int32_1874 = torch.constant.int 32
    %int8_1875 = torch.constant.int 8
    %int128_1876 = torch.constant.int 128
    %1811 = torch.prim.ListConstruct %1810, %int32_1874, %int8_1875, %int128_1876 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1812 = torch.aten.view %1808, %1811 : !torch.vtensor<[?,32,2,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %1812, [%293], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %1813 = torch.prim.ListConstruct %1806 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_1877 = torch.constant.bool false
    %1814 = torch.aten.index_put %1812, %1813, %1803, %false_1877 : !torch.vtensor<[?,32,8,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,32,8,128],f16>, !torch.bool -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %1814, [%293], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int32_1878 = torch.constant.int 32
    %int2_1879 = torch.constant.int 2
    %int32_1880 = torch.constant.int 32
    %int8_1881 = torch.constant.int 8
    %int128_1882 = torch.constant.int 128
    %1815 = torch.prim.ListConstruct %416, %int32_1878, %int2_1879, %int32_1880, %int8_1881, %int128_1882 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1816 = torch.aten.view %1814, %1815 : !torch.vtensor<[?,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f16>
    torch.bind_symbolic_shape %1816, [%293], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f16>
    %int2097152_1883 = torch.constant.int 2097152
    %1817 = torch.prim.ListConstruct %416, %int2097152_1883 : (!torch.int, !torch.int) -> !torch.list<int>
    %1818 = torch.aten.view %1816, %1817 : !torch.vtensor<[?,32,2,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,2097152],f16>
    torch.bind_symbolic_shape %1818, [%293], affine_map<()[s0] -> (s0, 2097152)> : !torch.vtensor<[?,2097152],f16>
    %int32_1884 = torch.constant.int 32
    %int2_1885 = torch.constant.int 2
    %int32_1886 = torch.constant.int 32
    %int8_1887 = torch.constant.int 8
    %int128_1888 = torch.constant.int 128
    %1819 = torch.prim.ListConstruct %416, %int32_1884, %int2_1885, %int32_1886, %int8_1887, %int128_1888 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1820 = torch.aten.view %1818, %1819 : !torch.vtensor<[?,2097152],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f16>
    torch.bind_symbolic_shape %1820, [%293], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f16>
    %int32_1889 = torch.constant.int 32
    %int8_1890 = torch.constant.int 8
    %int128_1891 = torch.constant.int 128
    %1821 = torch.prim.ListConstruct %1810, %int32_1889, %int8_1890, %int128_1891 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1822 = torch.aten.view %1820, %1821 : !torch.vtensor<[?,32,2,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %1822, [%293], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int4_1892 = torch.constant.int 4
    %int32_1893 = torch.constant.int 32
    %int8_1894 = torch.constant.int 8
    %int128_1895 = torch.constant.int 128
    %1823 = torch.prim.ListConstruct %int4_1892, %425, %int32_1893, %int8_1894, %int128_1895 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1824 = torch.aten.view %1738, %1823 : !torch.vtensor<[4,?,8,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,8,128],f16>
    torch.bind_symbolic_shape %1824, [%292], affine_map<()[s0] -> (4, s0, 32, 8, 128)> : !torch.vtensor<[4,?,32,8,128],f16>
    %int4_1896 = torch.constant.int 4
    %1825 = torch.aten.mul.int %int4_1896, %425 : !torch.int, !torch.int -> !torch.int
    %int32_1897 = torch.constant.int 32
    %int8_1898 = torch.constant.int 8
    %int128_1899 = torch.constant.int 128
    %1826 = torch.prim.ListConstruct %1825, %int32_1897, %int8_1898, %int128_1899 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1827 = torch.aten.view %1824, %1826 : !torch.vtensor<[4,?,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %1827, [%292], affine_map<()[s0] -> (s0 * 4, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int1_1900 = torch.constant.int 1
    %int1_1901 = torch.constant.int 1
    %1828 = torch.aten.add.Scalar %1798, %int1_1900, %int1_1901 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %1828, [%292], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int4_1902 = torch.constant.int 4
    %1829 = torch.aten.mul.int %int4_1902, %425 : !torch.int, !torch.int -> !torch.int
    %1830 = torch.prim.ListConstruct %1829 : (!torch.int) -> !torch.list<int>
    %1831 = torch.aten.view %1828, %1830 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %1831, [%292], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %1832 = torch.prim.ListConstruct %1831 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_1903 = torch.constant.bool false
    %1833 = torch.aten.index_put %1822, %1832, %1827, %false_1903 : !torch.vtensor<[?,32,8,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,32,8,128],f16>, !torch.bool -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %1833, [%293], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int32_1904 = torch.constant.int 32
    %int2_1905 = torch.constant.int 2
    %int32_1906 = torch.constant.int 32
    %int8_1907 = torch.constant.int 8
    %int128_1908 = torch.constant.int 128
    %1834 = torch.prim.ListConstruct %416, %int32_1904, %int2_1905, %int32_1906, %int8_1907, %int128_1908 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1835 = torch.aten.view %1833, %1834 : !torch.vtensor<[?,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f16>
    torch.bind_symbolic_shape %1835, [%293], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f16>
    %int2097152_1909 = torch.constant.int 2097152
    %1836 = torch.prim.ListConstruct %416, %int2097152_1909 : (!torch.int, !torch.int) -> !torch.list<int>
    %1837 = torch.aten.view %1835, %1836 : !torch.vtensor<[?,32,2,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,2097152],f16>
    torch.bind_symbolic_shape %1837, [%293], affine_map<()[s0] -> (s0, 2097152)> : !torch.vtensor<[?,2097152],f16>
    %int-2_1910 = torch.constant.int -2
    %1838 = torch.aten.unsqueeze %1796, %int-2_1910 : !torch.vtensor<[4,?,8,128],f16>, !torch.int -> !torch.vtensor<[4,?,8,1,128],f16>
    torch.bind_symbolic_shape %1838, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 1, 128)> : !torch.vtensor<[4,?,8,1,128],f16>
    %int4_1911 = torch.constant.int 4
    %int8_1912 = torch.constant.int 8
    %int4_1913 = torch.constant.int 4
    %int128_1914 = torch.constant.int 128
    %1839 = torch.prim.ListConstruct %int4_1911, %1782, %int8_1912, %int4_1913, %int128_1914 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_1915 = torch.constant.bool false
    %1840 = torch.aten.expand %1838, %1839, %false_1915 : !torch.vtensor<[4,?,8,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,8,4,128],f16>
    torch.bind_symbolic_shape %1840, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 4, 128)> : !torch.vtensor<[4,?,8,4,128],f16>
    %int0_1916 = torch.constant.int 0
    %1841 = torch.aten.clone %1840, %int0_1916 : !torch.vtensor<[4,?,8,4,128],f16>, !torch.int -> !torch.vtensor<[4,?,8,4,128],f16>
    torch.bind_symbolic_shape %1841, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 4, 128)> : !torch.vtensor<[4,?,8,4,128],f16>
    %int4_1917 = torch.constant.int 4
    %int32_1918 = torch.constant.int 32
    %int128_1919 = torch.constant.int 128
    %1842 = torch.prim.ListConstruct %int4_1917, %1782, %int32_1918, %int128_1919 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1843 = torch.aten._unsafe_view %1841, %1842 : !torch.vtensor<[4,?,8,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %1843, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int-2_1920 = torch.constant.int -2
    %1844 = torch.aten.unsqueeze %1738, %int-2_1920 : !torch.vtensor<[4,?,8,128],f16>, !torch.int -> !torch.vtensor<[4,?,8,1,128],f16>
    torch.bind_symbolic_shape %1844, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 1, 128)> : !torch.vtensor<[4,?,8,1,128],f16>
    %int1_1921 = torch.constant.int 1
    %1845 = torch.aten.size.int %1732, %int1_1921 : !torch.vtensor<[4,?,1024],f16>, !torch.int -> !torch.int
    %int4_1922 = torch.constant.int 4
    %int8_1923 = torch.constant.int 8
    %int4_1924 = torch.constant.int 4
    %int128_1925 = torch.constant.int 128
    %1846 = torch.prim.ListConstruct %int4_1922, %1845, %int8_1923, %int4_1924, %int128_1925 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_1926 = torch.constant.bool false
    %1847 = torch.aten.expand %1844, %1846, %false_1926 : !torch.vtensor<[4,?,8,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,8,4,128],f16>
    torch.bind_symbolic_shape %1847, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 4, 128)> : !torch.vtensor<[4,?,8,4,128],f16>
    %int0_1927 = torch.constant.int 0
    %1848 = torch.aten.clone %1847, %int0_1927 : !torch.vtensor<[4,?,8,4,128],f16>, !torch.int -> !torch.vtensor<[4,?,8,4,128],f16>
    torch.bind_symbolic_shape %1848, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 4, 128)> : !torch.vtensor<[4,?,8,4,128],f16>
    %int4_1928 = torch.constant.int 4
    %int32_1929 = torch.constant.int 32
    %int128_1930 = torch.constant.int 128
    %1849 = torch.prim.ListConstruct %int4_1928, %1845, %int32_1929, %int128_1930 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1850 = torch.aten._unsafe_view %1848, %1849 : !torch.vtensor<[4,?,8,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %1850, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int1_1931 = torch.constant.int 1
    %int2_1932 = torch.constant.int 2
    %1851 = torch.aten.transpose.int %1767, %int1_1931, %int2_1932 : !torch.vtensor<[4,?,32,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,32,?,128],f16>
    torch.bind_symbolic_shape %1851, [%292], affine_map<()[s0] -> (4, 32, s0 * 32, 128)> : !torch.vtensor<[4,32,?,128],f16>
    %int1_1933 = torch.constant.int 1
    %int2_1934 = torch.constant.int 2
    %1852 = torch.aten.transpose.int %1843, %int1_1933, %int2_1934 : !torch.vtensor<[4,?,32,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,32,?,128],f16>
    torch.bind_symbolic_shape %1852, [%292], affine_map<()[s0] -> (4, 32, s0 * 32, 128)> : !torch.vtensor<[4,32,?,128],f16>
    %int1_1935 = torch.constant.int 1
    %int2_1936 = torch.constant.int 2
    %1853 = torch.aten.transpose.int %1850, %int1_1935, %int2_1936 : !torch.vtensor<[4,?,32,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,32,?,128],f16>
    torch.bind_symbolic_shape %1853, [%292], affine_map<()[s0] -> (4, 32, s0 * 32, 128)> : !torch.vtensor<[4,32,?,128],f16>
    %float0.000000e00_1937 = torch.constant.float 0.000000e+00
    %false_1938 = torch.constant.bool false
    %none_1939 = torch.constant.none
    %1854:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%1851, %1852, %1853, %float0.000000e00_1937, %false_1938, %320, %none_1939) : (!torch.vtensor<[4,32,?,128],f16>, !torch.vtensor<[4,32,?,128],f16>, !torch.vtensor<[4,32,?,128],f16>, !torch.float, !torch.bool, !torch.vtensor<[4,1,?,?],f16>, !torch.none) -> (!torch.vtensor<[4,32,?,128],f16>, !torch.vtensor<[4,32,?],f32>) 
    torch.bind_symbolic_shape %1854#0, [%292], affine_map<()[s0] -> (4, 32, s0 * 32, 128)> : !torch.vtensor<[4,32,?,128],f16>
    %int1_1940 = torch.constant.int 1
    %int2_1941 = torch.constant.int 2
    %1855 = torch.aten.transpose.int %1854#0, %int1_1940, %int2_1941 : !torch.vtensor<[4,32,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %1855, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int4_1942 = torch.constant.int 4
    %int4096_1943 = torch.constant.int 4096
    %1856 = torch.prim.ListConstruct %int4_1942, %1753, %int4096_1943 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1857 = torch.aten.view %1855, %1856 : !torch.vtensor<[4,?,32,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %1857, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int-2_1944 = torch.constant.int -2
    %int-1_1945 = torch.constant.int -1
    %1858 = torch.aten.transpose.int %68, %int-2_1944, %int-1_1945 : !torch.vtensor<[4096,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,4096],f16>
    %int4_1946 = torch.constant.int 4
    %1859 = torch.aten.mul.int %int4_1946, %1753 : !torch.int, !torch.int -> !torch.int
    %int4096_1947 = torch.constant.int 4096
    %1860 = torch.prim.ListConstruct %1859, %int4096_1947 : (!torch.int, !torch.int) -> !torch.list<int>
    %1861 = torch.aten.view %1857, %1860 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %1861, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %1862 = torch.aten.mm %1861, %1858 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %1862, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_1948 = torch.constant.int 4
    %int4096_1949 = torch.constant.int 4096
    %1863 = torch.prim.ListConstruct %int4_1948, %1753, %int4096_1949 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1864 = torch.aten.view %1862, %1863 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %1864, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1950 = torch.constant.int 1
    %1865 = torch.aten.add.Tensor %1702, %1864, %int1_1950 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %1865, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int6_1951 = torch.constant.int 6
    %1866 = torch.prims.convert_element_type %1865, %int6_1951 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %1866, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_1952 = torch.constant.int 2
    %1867 = torch.aten.pow.Tensor_Scalar %1866, %int2_1952 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %1867, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int-1_1953 = torch.constant.int -1
    %1868 = torch.prim.ListConstruct %int-1_1953 : (!torch.int) -> !torch.list<int>
    %true_1954 = torch.constant.bool true
    %none_1955 = torch.constant.none
    %1869 = torch.aten.mean.dim %1867, %1868, %true_1954, %none_1955 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %1869, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_1956 = torch.constant.float 9.9999997473787516E-6
    %int1_1957 = torch.constant.int 1
    %1870 = torch.aten.add.Scalar %1869, %float9.999990e-06_1956, %int1_1957 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %1870, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %1871 = torch.aten.rsqrt %1870 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %1871, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %1872 = torch.aten.mul.Tensor %1866, %1871 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %1872, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %1873 = torch.aten.mul.Tensor %69, %1872 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %1873, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int5_1958 = torch.constant.int 5
    %1874 = torch.prims.convert_element_type %1873, %int5_1958 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %1874, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int-2_1959 = torch.constant.int -2
    %int-1_1960 = torch.constant.int -1
    %1875 = torch.aten.transpose.int %70, %int-2_1959, %int-1_1960 : !torch.vtensor<[14336,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,14336],f16>
    %int4_1961 = torch.constant.int 4
    %1876 = torch.aten.mul.int %int4_1961, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_1962 = torch.constant.int 4096
    %1877 = torch.prim.ListConstruct %1876, %int4096_1962 : (!torch.int, !torch.int) -> !torch.list<int>
    %1878 = torch.aten.view %1874, %1877 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %1878, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %1879 = torch.aten.mm %1878, %1875 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,14336],f16> -> !torch.vtensor<[?,14336],f16>
    torch.bind_symbolic_shape %1879, [%292], affine_map<()[s0] -> (s0 * 128, 14336)> : !torch.vtensor<[?,14336],f16>
    %int4_1963 = torch.constant.int 4
    %int14336_1964 = torch.constant.int 14336
    %1880 = torch.prim.ListConstruct %int4_1963, %294, %int14336_1964 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1881 = torch.aten.view %1879, %1880 : !torch.vtensor<[?,14336],f16>, !torch.list<int> -> !torch.vtensor<[4,?,14336],f16>
    torch.bind_symbolic_shape %1881, [%292], affine_map<()[s0] -> (4, s0 * 32, 14336)> : !torch.vtensor<[4,?,14336],f16>
    %1882 = torch.aten.silu %1881 : !torch.vtensor<[4,?,14336],f16> -> !torch.vtensor<[4,?,14336],f16>
    torch.bind_symbolic_shape %1882, [%292], affine_map<()[s0] -> (4, s0 * 32, 14336)> : !torch.vtensor<[4,?,14336],f16>
    %int-2_1965 = torch.constant.int -2
    %int-1_1966 = torch.constant.int -1
    %1883 = torch.aten.transpose.int %71, %int-2_1965, %int-1_1966 : !torch.vtensor<[14336,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,14336],f16>
    %int4_1967 = torch.constant.int 4
    %1884 = torch.aten.mul.int %int4_1967, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_1968 = torch.constant.int 4096
    %1885 = torch.prim.ListConstruct %1884, %int4096_1968 : (!torch.int, !torch.int) -> !torch.list<int>
    %1886 = torch.aten.view %1874, %1885 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %1886, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %1887 = torch.aten.mm %1886, %1883 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,14336],f16> -> !torch.vtensor<[?,14336],f16>
    torch.bind_symbolic_shape %1887, [%292], affine_map<()[s0] -> (s0 * 128, 14336)> : !torch.vtensor<[?,14336],f16>
    %int4_1969 = torch.constant.int 4
    %int14336_1970 = torch.constant.int 14336
    %1888 = torch.prim.ListConstruct %int4_1969, %294, %int14336_1970 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1889 = torch.aten.view %1887, %1888 : !torch.vtensor<[?,14336],f16>, !torch.list<int> -> !torch.vtensor<[4,?,14336],f16>
    torch.bind_symbolic_shape %1889, [%292], affine_map<()[s0] -> (4, s0 * 32, 14336)> : !torch.vtensor<[4,?,14336],f16>
    %1890 = torch.aten.mul.Tensor %1882, %1889 : !torch.vtensor<[4,?,14336],f16>, !torch.vtensor<[4,?,14336],f16> -> !torch.vtensor<[4,?,14336],f16>
    torch.bind_symbolic_shape %1890, [%292], affine_map<()[s0] -> (4, s0 * 32, 14336)> : !torch.vtensor<[4,?,14336],f16>
    %int-2_1971 = torch.constant.int -2
    %int-1_1972 = torch.constant.int -1
    %1891 = torch.aten.transpose.int %72, %int-2_1971, %int-1_1972 : !torch.vtensor<[4096,14336],f16>, !torch.int, !torch.int -> !torch.vtensor<[14336,4096],f16>
    %int1_1973 = torch.constant.int 1
    %1892 = torch.aten.size.int %1881, %int1_1973 : !torch.vtensor<[4,?,14336],f16>, !torch.int -> !torch.int
    %int4_1974 = torch.constant.int 4
    %1893 = torch.aten.mul.int %int4_1974, %1892 : !torch.int, !torch.int -> !torch.int
    %int14336_1975 = torch.constant.int 14336
    %1894 = torch.prim.ListConstruct %1893, %int14336_1975 : (!torch.int, !torch.int) -> !torch.list<int>
    %1895 = torch.aten.view %1890, %1894 : !torch.vtensor<[4,?,14336],f16>, !torch.list<int> -> !torch.vtensor<[?,14336],f16>
    torch.bind_symbolic_shape %1895, [%292], affine_map<()[s0] -> (s0 * 128, 14336)> : !torch.vtensor<[?,14336],f16>
    %1896 = torch.aten.mm %1895, %1891 : !torch.vtensor<[?,14336],f16>, !torch.vtensor<[14336,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %1896, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_1976 = torch.constant.int 4
    %int4096_1977 = torch.constant.int 4096
    %1897 = torch.prim.ListConstruct %int4_1976, %1892, %int4096_1977 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1898 = torch.aten.view %1896, %1897 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %1898, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1978 = torch.constant.int 1
    %1899 = torch.aten.add.Tensor %1865, %1898, %int1_1978 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %1899, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int6_1979 = torch.constant.int 6
    %1900 = torch.prims.convert_element_type %1899, %int6_1979 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %1900, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_1980 = torch.constant.int 2
    %1901 = torch.aten.pow.Tensor_Scalar %1900, %int2_1980 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %1901, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int-1_1981 = torch.constant.int -1
    %1902 = torch.prim.ListConstruct %int-1_1981 : (!torch.int) -> !torch.list<int>
    %true_1982 = torch.constant.bool true
    %none_1983 = torch.constant.none
    %1903 = torch.aten.mean.dim %1901, %1902, %true_1982, %none_1983 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %1903, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_1984 = torch.constant.float 9.9999997473787516E-6
    %int1_1985 = torch.constant.int 1
    %1904 = torch.aten.add.Scalar %1903, %float9.999990e-06_1984, %int1_1985 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %1904, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %1905 = torch.aten.rsqrt %1904 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %1905, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %1906 = torch.aten.mul.Tensor %1900, %1905 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %1906, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %1907 = torch.aten.mul.Tensor %73, %1906 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %1907, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int5_1986 = torch.constant.int 5
    %1908 = torch.prims.convert_element_type %1907, %int5_1986 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %1908, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int-2_1987 = torch.constant.int -2
    %int-1_1988 = torch.constant.int -1
    %1909 = torch.aten.transpose.int %74, %int-2_1987, %int-1_1988 : !torch.vtensor<[4096,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,4096],f16>
    %int4_1989 = torch.constant.int 4
    %1910 = torch.aten.mul.int %int4_1989, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_1990 = torch.constant.int 4096
    %1911 = torch.prim.ListConstruct %1910, %int4096_1990 : (!torch.int, !torch.int) -> !torch.list<int>
    %1912 = torch.aten.view %1908, %1911 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %1912, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %1913 = torch.aten.mm %1912, %1909 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %1913, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_1991 = torch.constant.int 4
    %int4096_1992 = torch.constant.int 4096
    %1914 = torch.prim.ListConstruct %int4_1991, %294, %int4096_1992 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1915 = torch.aten.view %1913, %1914 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %1915, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int-2_1993 = torch.constant.int -2
    %int-1_1994 = torch.constant.int -1
    %1916 = torch.aten.transpose.int %75, %int-2_1993, %int-1_1994 : !torch.vtensor<[1024,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,1024],f16>
    %int4_1995 = torch.constant.int 4
    %1917 = torch.aten.mul.int %int4_1995, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_1996 = torch.constant.int 4096
    %1918 = torch.prim.ListConstruct %1917, %int4096_1996 : (!torch.int, !torch.int) -> !torch.list<int>
    %1919 = torch.aten.view %1908, %1918 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %1919, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %1920 = torch.aten.mm %1919, %1916 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1024],f16> -> !torch.vtensor<[?,1024],f16>
    torch.bind_symbolic_shape %1920, [%292], affine_map<()[s0] -> (s0 * 128, 1024)> : !torch.vtensor<[?,1024],f16>
    %int4_1997 = torch.constant.int 4
    %int1024_1998 = torch.constant.int 1024
    %1921 = torch.prim.ListConstruct %int4_1997, %294, %int1024_1998 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1922 = torch.aten.view %1920, %1921 : !torch.vtensor<[?,1024],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1024],f16>
    torch.bind_symbolic_shape %1922, [%292], affine_map<()[s0] -> (4, s0 * 32, 1024)> : !torch.vtensor<[4,?,1024],f16>
    %int-2_1999 = torch.constant.int -2
    %int-1_2000 = torch.constant.int -1
    %1923 = torch.aten.transpose.int %76, %int-2_1999, %int-1_2000 : !torch.vtensor<[1024,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,1024],f16>
    %int4_2001 = torch.constant.int 4
    %1924 = torch.aten.mul.int %int4_2001, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_2002 = torch.constant.int 4096
    %1925 = torch.prim.ListConstruct %1924, %int4096_2002 : (!torch.int, !torch.int) -> !torch.list<int>
    %1926 = torch.aten.view %1908, %1925 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %1926, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %1927 = torch.aten.mm %1926, %1923 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1024],f16> -> !torch.vtensor<[?,1024],f16>
    torch.bind_symbolic_shape %1927, [%292], affine_map<()[s0] -> (s0 * 128, 1024)> : !torch.vtensor<[?,1024],f16>
    %int4_2003 = torch.constant.int 4
    %int1024_2004 = torch.constant.int 1024
    %1928 = torch.prim.ListConstruct %int4_2003, %294, %int1024_2004 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1929 = torch.aten.view %1927, %1928 : !torch.vtensor<[?,1024],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1024],f16>
    torch.bind_symbolic_shape %1929, [%292], affine_map<()[s0] -> (4, s0 * 32, 1024)> : !torch.vtensor<[4,?,1024],f16>
    %int4_2005 = torch.constant.int 4
    %int32_2006 = torch.constant.int 32
    %int128_2007 = torch.constant.int 128
    %1930 = torch.prim.ListConstruct %int4_2005, %294, %int32_2006, %int128_2007 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1931 = torch.aten.view %1915, %1930 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %1931, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int4_2008 = torch.constant.int 4
    %int8_2009 = torch.constant.int 8
    %int128_2010 = torch.constant.int 128
    %1932 = torch.prim.ListConstruct %int4_2008, %294, %int8_2009, %int128_2010 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1933 = torch.aten.view %1922, %1932 : !torch.vtensor<[4,?,1024],f16>, !torch.list<int> -> !torch.vtensor<[4,?,8,128],f16>
    torch.bind_symbolic_shape %1933, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 128)> : !torch.vtensor<[4,?,8,128],f16>
    %int4_2011 = torch.constant.int 4
    %int8_2012 = torch.constant.int 8
    %int128_2013 = torch.constant.int 128
    %1934 = torch.prim.ListConstruct %int4_2011, %294, %int8_2012, %int128_2013 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1935 = torch.aten.view %1929, %1934 : !torch.vtensor<[4,?,1024],f16>, !torch.list<int> -> !torch.vtensor<[4,?,8,128],f16>
    torch.bind_symbolic_shape %1935, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 128)> : !torch.vtensor<[4,?,8,128],f16>
    %int131072_2014 = torch.constant.int 131072
    %none_2015 = torch.constant.none
    %none_2016 = torch.constant.none
    %cpu_2017 = torch.constant.device "cpu"
    %false_2018 = torch.constant.bool false
    %1936 = torch.aten.arange %int131072_2014, %none_2015, %none_2016, %cpu_2017, %false_2018 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64>
    %int0_2019 = torch.constant.int 0
    %int128_2020 = torch.constant.int 128
    %int2_2021 = torch.constant.int 2
    %none_2022 = torch.constant.none
    %none_2023 = torch.constant.none
    %cpu_2024 = torch.constant.device "cpu"
    %false_2025 = torch.constant.bool false
    %1937 = torch.aten.arange.start_step %int0_2019, %int128_2020, %int2_2021, %none_2022, %none_2023, %cpu_2024, %false_2025 : !torch.int, !torch.int, !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64>
    %int0_2026 = torch.constant.int 0
    %int0_2027 = torch.constant.int 0
    %int64_2028 = torch.constant.int 64
    %int1_2029 = torch.constant.int 1
    %1938 = torch.aten.slice.Tensor %1937, %int0_2026, %int0_2027, %int64_2028, %int1_2029 : !torch.vtensor<[64],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[64],si64>
    %int6_2030 = torch.constant.int 6
    %1939 = torch.prims.convert_element_type %1938, %int6_2030 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32>
    %int128_2031 = torch.constant.int 128
    %1940 = torch.aten.div.Scalar %1939, %int128_2031 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32>
    %float5.000000e05_2032 = torch.constant.float 5.000000e+05
    %1941 = torch.aten.pow.Scalar %float5.000000e05_2032, %1940 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %1942 = torch.aten.reciprocal %1941 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %float1.000000e00_2033 = torch.constant.float 1.000000e+00
    %1943 = torch.aten.mul.Scalar %1942, %float1.000000e00_2033 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32>
    %int131072_2034 = torch.constant.int 131072
    %int1_2035 = torch.constant.int 1
    %1944 = torch.prim.ListConstruct %int131072_2034, %int1_2035 : (!torch.int, !torch.int) -> !torch.list<int>
    %1945 = torch.aten.view %1936, %1944 : !torch.vtensor<[131072],si64>, !torch.list<int> -> !torch.vtensor<[131072,1],si64>
    %1946 = torch.aten.mul.Tensor %1945, %1943 : !torch.vtensor<[131072,1],si64>, !torch.vtensor<[64],f32> -> !torch.vtensor<[131072,64],f32>
    %1947 = torch.aten.cos %1946 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %1948 = torch.aten.sin %1946 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %1949 = torch.aten.complex %1947, %1948 : !torch.vtensor<[131072,64],f32>, !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],complex<f32>>
    %int1_2036 = torch.constant.int 1
    %1950 = torch.aten.size.int %1915, %int1_2036 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.int
    %int0_2037 = torch.constant.int 0
    %1951 = torch.aten.add.int %int0_2037, %1950 : !torch.int, !torch.int -> !torch.int
    %int0_2038 = torch.constant.int 0
    %int0_2039 = torch.constant.int 0
    %int1_2040 = torch.constant.int 1
    %1952 = torch.aten.slice.Tensor %1949, %int0_2038, %int0_2039, %1951, %int1_2040 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %1952, [%292], affine_map<()[s0] -> (s0 * 32, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_2041 = torch.constant.int 1
    %int0_2042 = torch.constant.int 0
    %int9223372036854775807_2043 = torch.constant.int 9223372036854775807
    %int1_2044 = torch.constant.int 1
    %1953 = torch.aten.slice.Tensor %1952, %int1_2041, %int0_2042, %int9223372036854775807_2043, %int1_2044 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %1953, [%292], affine_map<()[s0] -> (s0 * 32, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_2045 = torch.constant.int 0
    %1954 = torch.aten.unsqueeze %1953, %int0_2045 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %1954, [%292], affine_map<()[s0] -> (1, s0 * 32, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_2046 = torch.constant.int 2
    %1955 = torch.aten.unsqueeze %1954, %int2_2046 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %1955, [%292], affine_map<()[s0] -> (1, s0 * 32, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_2047 = torch.constant.int 3
    %int0_2048 = torch.constant.int 0
    %int9223372036854775807_2049 = torch.constant.int 9223372036854775807
    %int1_2050 = torch.constant.int 1
    %1956 = torch.aten.slice.Tensor %1955, %int3_2047, %int0_2048, %int9223372036854775807_2049, %int1_2050 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %1956, [%292], affine_map<()[s0] -> (1, s0 * 32, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %1957 = torch_c.to_builtin_tensor %1931 : !torch.vtensor<[4,?,32,128],f16> -> tensor<4x?x32x128xf16>
    %c1_2051 = arith.constant 1 : index
    %dim_2052 = tensor.dim %1957, %c1_2051 : tensor<4x?x32x128xf16>
    %1958 = flow.tensor.bitcast %1957 : tensor<4x?x32x128xf16>{%dim_2052} -> tensor<4x?x32x64xcomplex<f16>>{%dim_2052}
    %1959 = torch_c.from_builtin_tensor %1958 : tensor<4x?x32x64xcomplex<f16>> -> !torch.vtensor<[4,?,32,64],complex<f16>>
    torch.bind_symbolic_shape %1959, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 64)> : !torch.vtensor<[4,?,32,64],complex<f16>>
    %1960 = torch.aten.mul.Tensor %1959, %1956 : !torch.vtensor<[4,?,32,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,32,64],complex<f32>>
    torch.bind_symbolic_shape %1960, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 64)> : !torch.vtensor<[4,?,32,64],complex<f32>>
    %1961 = torch_c.to_builtin_tensor %1960 : !torch.vtensor<[4,?,32,64],complex<f32>> -> tensor<4x?x32x64xcomplex<f32>>
    %c1_2053 = arith.constant 1 : index
    %dim_2054 = tensor.dim %1961, %c1_2053 : tensor<4x?x32x64xcomplex<f32>>
    %1962 = flow.tensor.bitcast %1961 : tensor<4x?x32x64xcomplex<f32>>{%dim_2054} -> tensor<4x?x32x128xf32>{%dim_2054}
    %1963 = torch_c.from_builtin_tensor %1962 : tensor<4x?x32x128xf32> -> !torch.vtensor<[4,?,32,128],f32>
    torch.bind_symbolic_shape %1963, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f32>
    %int5_2055 = torch.constant.int 5
    %1964 = torch.prims.convert_element_type %1963, %int5_2055 : !torch.vtensor<[4,?,32,128],f32>, !torch.int -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %1964, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int131072_2056 = torch.constant.int 131072
    %none_2057 = torch.constant.none
    %none_2058 = torch.constant.none
    %cpu_2059 = torch.constant.device "cpu"
    %false_2060 = torch.constant.bool false
    %1965 = torch.aten.arange %int131072_2056, %none_2057, %none_2058, %cpu_2059, %false_2060 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64>
    %int0_2061 = torch.constant.int 0
    %int128_2062 = torch.constant.int 128
    %int2_2063 = torch.constant.int 2
    %none_2064 = torch.constant.none
    %none_2065 = torch.constant.none
    %cpu_2066 = torch.constant.device "cpu"
    %false_2067 = torch.constant.bool false
    %1966 = torch.aten.arange.start_step %int0_2061, %int128_2062, %int2_2063, %none_2064, %none_2065, %cpu_2066, %false_2067 : !torch.int, !torch.int, !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64>
    %int0_2068 = torch.constant.int 0
    %int0_2069 = torch.constant.int 0
    %int64_2070 = torch.constant.int 64
    %int1_2071 = torch.constant.int 1
    %1967 = torch.aten.slice.Tensor %1966, %int0_2068, %int0_2069, %int64_2070, %int1_2071 : !torch.vtensor<[64],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[64],si64>
    %int6_2072 = torch.constant.int 6
    %1968 = torch.prims.convert_element_type %1967, %int6_2072 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32>
    %int128_2073 = torch.constant.int 128
    %1969 = torch.aten.div.Scalar %1968, %int128_2073 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32>
    %float5.000000e05_2074 = torch.constant.float 5.000000e+05
    %1970 = torch.aten.pow.Scalar %float5.000000e05_2074, %1969 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %1971 = torch.aten.reciprocal %1970 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %float1.000000e00_2075 = torch.constant.float 1.000000e+00
    %1972 = torch.aten.mul.Scalar %1971, %float1.000000e00_2075 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32>
    %int131072_2076 = torch.constant.int 131072
    %int1_2077 = torch.constant.int 1
    %1973 = torch.prim.ListConstruct %int131072_2076, %int1_2077 : (!torch.int, !torch.int) -> !torch.list<int>
    %1974 = torch.aten.view %1965, %1973 : !torch.vtensor<[131072],si64>, !torch.list<int> -> !torch.vtensor<[131072,1],si64>
    %1975 = torch.aten.mul.Tensor %1974, %1972 : !torch.vtensor<[131072,1],si64>, !torch.vtensor<[64],f32> -> !torch.vtensor<[131072,64],f32>
    %1976 = torch.aten.cos %1975 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %1977 = torch.aten.sin %1975 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %1978 = torch.aten.complex %1976, %1977 : !torch.vtensor<[131072,64],f32>, !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],complex<f32>>
    %int1_2078 = torch.constant.int 1
    %1979 = torch.aten.size.int %1922, %int1_2078 : !torch.vtensor<[4,?,1024],f16>, !torch.int -> !torch.int
    %int0_2079 = torch.constant.int 0
    %1980 = torch.aten.add.int %int0_2079, %1979 : !torch.int, !torch.int -> !torch.int
    %int0_2080 = torch.constant.int 0
    %int0_2081 = torch.constant.int 0
    %int1_2082 = torch.constant.int 1
    %1981 = torch.aten.slice.Tensor %1978, %int0_2080, %int0_2081, %1980, %int1_2082 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %1981, [%292], affine_map<()[s0] -> (s0 * 32, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_2083 = torch.constant.int 1
    %int0_2084 = torch.constant.int 0
    %int9223372036854775807_2085 = torch.constant.int 9223372036854775807
    %int1_2086 = torch.constant.int 1
    %1982 = torch.aten.slice.Tensor %1981, %int1_2083, %int0_2084, %int9223372036854775807_2085, %int1_2086 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %1982, [%292], affine_map<()[s0] -> (s0 * 32, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_2087 = torch.constant.int 0
    %1983 = torch.aten.unsqueeze %1982, %int0_2087 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %1983, [%292], affine_map<()[s0] -> (1, s0 * 32, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_2088 = torch.constant.int 2
    %1984 = torch.aten.unsqueeze %1983, %int2_2088 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %1984, [%292], affine_map<()[s0] -> (1, s0 * 32, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_2089 = torch.constant.int 3
    %int0_2090 = torch.constant.int 0
    %int9223372036854775807_2091 = torch.constant.int 9223372036854775807
    %int1_2092 = torch.constant.int 1
    %1985 = torch.aten.slice.Tensor %1984, %int3_2089, %int0_2090, %int9223372036854775807_2091, %int1_2092 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %1985, [%292], affine_map<()[s0] -> (1, s0 * 32, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %1986 = torch_c.to_builtin_tensor %1933 : !torch.vtensor<[4,?,8,128],f16> -> tensor<4x?x8x128xf16>
    %c1_2093 = arith.constant 1 : index
    %dim_2094 = tensor.dim %1986, %c1_2093 : tensor<4x?x8x128xf16>
    %1987 = flow.tensor.bitcast %1986 : tensor<4x?x8x128xf16>{%dim_2094} -> tensor<4x?x8x64xcomplex<f16>>{%dim_2094}
    %1988 = torch_c.from_builtin_tensor %1987 : tensor<4x?x8x64xcomplex<f16>> -> !torch.vtensor<[4,?,8,64],complex<f16>>
    torch.bind_symbolic_shape %1988, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 64)> : !torch.vtensor<[4,?,8,64],complex<f16>>
    %1989 = torch.aten.mul.Tensor %1988, %1985 : !torch.vtensor<[4,?,8,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,8,64],complex<f32>>
    torch.bind_symbolic_shape %1989, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 64)> : !torch.vtensor<[4,?,8,64],complex<f32>>
    %1990 = torch_c.to_builtin_tensor %1989 : !torch.vtensor<[4,?,8,64],complex<f32>> -> tensor<4x?x8x64xcomplex<f32>>
    %c1_2095 = arith.constant 1 : index
    %dim_2096 = tensor.dim %1990, %c1_2095 : tensor<4x?x8x64xcomplex<f32>>
    %1991 = flow.tensor.bitcast %1990 : tensor<4x?x8x64xcomplex<f32>>{%dim_2096} -> tensor<4x?x8x128xf32>{%dim_2096}
    %1992 = torch_c.from_builtin_tensor %1991 : tensor<4x?x8x128xf32> -> !torch.vtensor<[4,?,8,128],f32>
    torch.bind_symbolic_shape %1992, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 128)> : !torch.vtensor<[4,?,8,128],f32>
    %int5_2097 = torch.constant.int 5
    %1993 = torch.prims.convert_element_type %1992, %int5_2097 : !torch.vtensor<[4,?,8,128],f32>, !torch.int -> !torch.vtensor<[4,?,8,128],f16>
    torch.bind_symbolic_shape %1993, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 128)> : !torch.vtensor<[4,?,8,128],f16>
    %int64_2098 = torch.constant.int 64
    %1994 = torch.aten.mul.Scalar %arg2, %int64_2098 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %1994, [%292], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int16 = torch.constant.int 16
    %int1_2099 = torch.constant.int 1
    %1995 = torch.aten.add.Scalar %1994, %int16, %int1_2099 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %1995, [%292], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int4_2100 = torch.constant.int 4
    %int32_2101 = torch.constant.int 32
    %int8_2102 = torch.constant.int 8
    %int128_2103 = torch.constant.int 128
    %1996 = torch.prim.ListConstruct %int4_2100, %425, %int32_2101, %int8_2102, %int128_2103 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1997 = torch.aten.view %1993, %1996 : !torch.vtensor<[4,?,8,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,8,128],f16>
    torch.bind_symbolic_shape %1997, [%292], affine_map<()[s0] -> (4, s0, 32, 8, 128)> : !torch.vtensor<[4,?,32,8,128],f16>
    %int4_2104 = torch.constant.int 4
    %1998 = torch.aten.mul.int %int4_2104, %425 : !torch.int, !torch.int -> !torch.int
    %int32_2105 = torch.constant.int 32
    %int8_2106 = torch.constant.int 8
    %int128_2107 = torch.constant.int 128
    %1999 = torch.prim.ListConstruct %1998, %int32_2105, %int8_2106, %int128_2107 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2000 = torch.aten.view %1997, %1999 : !torch.vtensor<[4,?,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %2000, [%292], affine_map<()[s0] -> (s0 * 4, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int4_2108 = torch.constant.int 4
    %2001 = torch.aten.mul.int %int4_2108, %425 : !torch.int, !torch.int -> !torch.int
    %2002 = torch.prim.ListConstruct %2001 : (!torch.int) -> !torch.list<int>
    %2003 = torch.aten.view %1995, %2002 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %2003, [%292], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int32_2109 = torch.constant.int 32
    %int2_2110 = torch.constant.int 2
    %int32_2111 = torch.constant.int 32
    %int8_2112 = torch.constant.int 8
    %int128_2113 = torch.constant.int 128
    %2004 = torch.prim.ListConstruct %416, %int32_2109, %int2_2110, %int32_2111, %int8_2112, %int128_2113 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2005 = torch.aten.view %1837, %2004 : !torch.vtensor<[?,2097152],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f16>
    torch.bind_symbolic_shape %2005, [%293], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f16>
    %int32_2114 = torch.constant.int 32
    %2006 = torch.aten.mul.int %416, %int32_2114 : !torch.int, !torch.int -> !torch.int
    %int2_2115 = torch.constant.int 2
    %2007 = torch.aten.mul.int %2006, %int2_2115 : !torch.int, !torch.int -> !torch.int
    %int32_2116 = torch.constant.int 32
    %int8_2117 = torch.constant.int 8
    %int128_2118 = torch.constant.int 128
    %2008 = torch.prim.ListConstruct %2007, %int32_2116, %int8_2117, %int128_2118 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2009 = torch.aten.view %2005, %2008 : !torch.vtensor<[?,32,2,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %2009, [%293], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %2010 = torch.prim.ListConstruct %2003 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_2119 = torch.constant.bool false
    %2011 = torch.aten.index_put %2009, %2010, %2000, %false_2119 : !torch.vtensor<[?,32,8,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,32,8,128],f16>, !torch.bool -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %2011, [%293], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int32_2120 = torch.constant.int 32
    %int2_2121 = torch.constant.int 2
    %int32_2122 = torch.constant.int 32
    %int8_2123 = torch.constant.int 8
    %int128_2124 = torch.constant.int 128
    %2012 = torch.prim.ListConstruct %416, %int32_2120, %int2_2121, %int32_2122, %int8_2123, %int128_2124 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2013 = torch.aten.view %2011, %2012 : !torch.vtensor<[?,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f16>
    torch.bind_symbolic_shape %2013, [%293], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f16>
    %int2097152_2125 = torch.constant.int 2097152
    %2014 = torch.prim.ListConstruct %416, %int2097152_2125 : (!torch.int, !torch.int) -> !torch.list<int>
    %2015 = torch.aten.view %2013, %2014 : !torch.vtensor<[?,32,2,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,2097152],f16>
    torch.bind_symbolic_shape %2015, [%293], affine_map<()[s0] -> (s0, 2097152)> : !torch.vtensor<[?,2097152],f16>
    %int32_2126 = torch.constant.int 32
    %int2_2127 = torch.constant.int 2
    %int32_2128 = torch.constant.int 32
    %int8_2129 = torch.constant.int 8
    %int128_2130 = torch.constant.int 128
    %2016 = torch.prim.ListConstruct %416, %int32_2126, %int2_2127, %int32_2128, %int8_2129, %int128_2130 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2017 = torch.aten.view %2015, %2016 : !torch.vtensor<[?,2097152],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f16>
    torch.bind_symbolic_shape %2017, [%293], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f16>
    %int32_2131 = torch.constant.int 32
    %int8_2132 = torch.constant.int 8
    %int128_2133 = torch.constant.int 128
    %2018 = torch.prim.ListConstruct %2007, %int32_2131, %int8_2132, %int128_2133 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2019 = torch.aten.view %2017, %2018 : !torch.vtensor<[?,32,2,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %2019, [%293], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int4_2134 = torch.constant.int 4
    %int32_2135 = torch.constant.int 32
    %int8_2136 = torch.constant.int 8
    %int128_2137 = torch.constant.int 128
    %2020 = torch.prim.ListConstruct %int4_2134, %425, %int32_2135, %int8_2136, %int128_2137 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2021 = torch.aten.view %1935, %2020 : !torch.vtensor<[4,?,8,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,8,128],f16>
    torch.bind_symbolic_shape %2021, [%292], affine_map<()[s0] -> (4, s0, 32, 8, 128)> : !torch.vtensor<[4,?,32,8,128],f16>
    %int4_2138 = torch.constant.int 4
    %2022 = torch.aten.mul.int %int4_2138, %425 : !torch.int, !torch.int -> !torch.int
    %int32_2139 = torch.constant.int 32
    %int8_2140 = torch.constant.int 8
    %int128_2141 = torch.constant.int 128
    %2023 = torch.prim.ListConstruct %2022, %int32_2139, %int8_2140, %int128_2141 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2024 = torch.aten.view %2021, %2023 : !torch.vtensor<[4,?,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %2024, [%292], affine_map<()[s0] -> (s0 * 4, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int1_2142 = torch.constant.int 1
    %int1_2143 = torch.constant.int 1
    %2025 = torch.aten.add.Scalar %1995, %int1_2142, %int1_2143 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %2025, [%292], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int4_2144 = torch.constant.int 4
    %2026 = torch.aten.mul.int %int4_2144, %425 : !torch.int, !torch.int -> !torch.int
    %2027 = torch.prim.ListConstruct %2026 : (!torch.int) -> !torch.list<int>
    %2028 = torch.aten.view %2025, %2027 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %2028, [%292], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %2029 = torch.prim.ListConstruct %2028 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_2145 = torch.constant.bool false
    %2030 = torch.aten.index_put %2019, %2029, %2024, %false_2145 : !torch.vtensor<[?,32,8,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,32,8,128],f16>, !torch.bool -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %2030, [%293], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int32_2146 = torch.constant.int 32
    %int2_2147 = torch.constant.int 2
    %int32_2148 = torch.constant.int 32
    %int8_2149 = torch.constant.int 8
    %int128_2150 = torch.constant.int 128
    %2031 = torch.prim.ListConstruct %416, %int32_2146, %int2_2147, %int32_2148, %int8_2149, %int128_2150 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2032 = torch.aten.view %2030, %2031 : !torch.vtensor<[?,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f16>
    torch.bind_symbolic_shape %2032, [%293], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f16>
    %int2097152_2151 = torch.constant.int 2097152
    %2033 = torch.prim.ListConstruct %416, %int2097152_2151 : (!torch.int, !torch.int) -> !torch.list<int>
    %2034 = torch.aten.view %2032, %2033 : !torch.vtensor<[?,32,2,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,2097152],f16>
    torch.bind_symbolic_shape %2034, [%293], affine_map<()[s0] -> (s0, 2097152)> : !torch.vtensor<[?,2097152],f16>
    %int-2_2152 = torch.constant.int -2
    %2035 = torch.aten.unsqueeze %1993, %int-2_2152 : !torch.vtensor<[4,?,8,128],f16>, !torch.int -> !torch.vtensor<[4,?,8,1,128],f16>
    torch.bind_symbolic_shape %2035, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 1, 128)> : !torch.vtensor<[4,?,8,1,128],f16>
    %int4_2153 = torch.constant.int 4
    %int8_2154 = torch.constant.int 8
    %int4_2155 = torch.constant.int 4
    %int128_2156 = torch.constant.int 128
    %2036 = torch.prim.ListConstruct %int4_2153, %1979, %int8_2154, %int4_2155, %int128_2156 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_2157 = torch.constant.bool false
    %2037 = torch.aten.expand %2035, %2036, %false_2157 : !torch.vtensor<[4,?,8,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,8,4,128],f16>
    torch.bind_symbolic_shape %2037, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 4, 128)> : !torch.vtensor<[4,?,8,4,128],f16>
    %int0_2158 = torch.constant.int 0
    %2038 = torch.aten.clone %2037, %int0_2158 : !torch.vtensor<[4,?,8,4,128],f16>, !torch.int -> !torch.vtensor<[4,?,8,4,128],f16>
    torch.bind_symbolic_shape %2038, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 4, 128)> : !torch.vtensor<[4,?,8,4,128],f16>
    %int4_2159 = torch.constant.int 4
    %int32_2160 = torch.constant.int 32
    %int128_2161 = torch.constant.int 128
    %2039 = torch.prim.ListConstruct %int4_2159, %1979, %int32_2160, %int128_2161 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2040 = torch.aten._unsafe_view %2038, %2039 : !torch.vtensor<[4,?,8,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %2040, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int-2_2162 = torch.constant.int -2
    %2041 = torch.aten.unsqueeze %1935, %int-2_2162 : !torch.vtensor<[4,?,8,128],f16>, !torch.int -> !torch.vtensor<[4,?,8,1,128],f16>
    torch.bind_symbolic_shape %2041, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 1, 128)> : !torch.vtensor<[4,?,8,1,128],f16>
    %int1_2163 = torch.constant.int 1
    %2042 = torch.aten.size.int %1929, %int1_2163 : !torch.vtensor<[4,?,1024],f16>, !torch.int -> !torch.int
    %int4_2164 = torch.constant.int 4
    %int8_2165 = torch.constant.int 8
    %int4_2166 = torch.constant.int 4
    %int128_2167 = torch.constant.int 128
    %2043 = torch.prim.ListConstruct %int4_2164, %2042, %int8_2165, %int4_2166, %int128_2167 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_2168 = torch.constant.bool false
    %2044 = torch.aten.expand %2041, %2043, %false_2168 : !torch.vtensor<[4,?,8,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,8,4,128],f16>
    torch.bind_symbolic_shape %2044, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 4, 128)> : !torch.vtensor<[4,?,8,4,128],f16>
    %int0_2169 = torch.constant.int 0
    %2045 = torch.aten.clone %2044, %int0_2169 : !torch.vtensor<[4,?,8,4,128],f16>, !torch.int -> !torch.vtensor<[4,?,8,4,128],f16>
    torch.bind_symbolic_shape %2045, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 4, 128)> : !torch.vtensor<[4,?,8,4,128],f16>
    %int4_2170 = torch.constant.int 4
    %int32_2171 = torch.constant.int 32
    %int128_2172 = torch.constant.int 128
    %2046 = torch.prim.ListConstruct %int4_2170, %2042, %int32_2171, %int128_2172 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2047 = torch.aten._unsafe_view %2045, %2046 : !torch.vtensor<[4,?,8,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %2047, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int1_2173 = torch.constant.int 1
    %int2_2174 = torch.constant.int 2
    %2048 = torch.aten.transpose.int %1964, %int1_2173, %int2_2174 : !torch.vtensor<[4,?,32,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,32,?,128],f16>
    torch.bind_symbolic_shape %2048, [%292], affine_map<()[s0] -> (4, 32, s0 * 32, 128)> : !torch.vtensor<[4,32,?,128],f16>
    %int1_2175 = torch.constant.int 1
    %int2_2176 = torch.constant.int 2
    %2049 = torch.aten.transpose.int %2040, %int1_2175, %int2_2176 : !torch.vtensor<[4,?,32,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,32,?,128],f16>
    torch.bind_symbolic_shape %2049, [%292], affine_map<()[s0] -> (4, 32, s0 * 32, 128)> : !torch.vtensor<[4,32,?,128],f16>
    %int1_2177 = torch.constant.int 1
    %int2_2178 = torch.constant.int 2
    %2050 = torch.aten.transpose.int %2047, %int1_2177, %int2_2178 : !torch.vtensor<[4,?,32,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,32,?,128],f16>
    torch.bind_symbolic_shape %2050, [%292], affine_map<()[s0] -> (4, 32, s0 * 32, 128)> : !torch.vtensor<[4,32,?,128],f16>
    %float0.000000e00_2179 = torch.constant.float 0.000000e+00
    %false_2180 = torch.constant.bool false
    %none_2181 = torch.constant.none
    %2051:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%2048, %2049, %2050, %float0.000000e00_2179, %false_2180, %320, %none_2181) : (!torch.vtensor<[4,32,?,128],f16>, !torch.vtensor<[4,32,?,128],f16>, !torch.vtensor<[4,32,?,128],f16>, !torch.float, !torch.bool, !torch.vtensor<[4,1,?,?],f16>, !torch.none) -> (!torch.vtensor<[4,32,?,128],f16>, !torch.vtensor<[4,32,?],f32>) 
    torch.bind_symbolic_shape %2051#0, [%292], affine_map<()[s0] -> (4, 32, s0 * 32, 128)> : !torch.vtensor<[4,32,?,128],f16>
    %int1_2182 = torch.constant.int 1
    %int2_2183 = torch.constant.int 2
    %2052 = torch.aten.transpose.int %2051#0, %int1_2182, %int2_2183 : !torch.vtensor<[4,32,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %2052, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int4_2184 = torch.constant.int 4
    %int4096_2185 = torch.constant.int 4096
    %2053 = torch.prim.ListConstruct %int4_2184, %1950, %int4096_2185 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2054 = torch.aten.view %2052, %2053 : !torch.vtensor<[4,?,32,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %2054, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int-2_2186 = torch.constant.int -2
    %int-1_2187 = torch.constant.int -1
    %2055 = torch.aten.transpose.int %77, %int-2_2186, %int-1_2187 : !torch.vtensor<[4096,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,4096],f16>
    %int4_2188 = torch.constant.int 4
    %2056 = torch.aten.mul.int %int4_2188, %1950 : !torch.int, !torch.int -> !torch.int
    %int4096_2189 = torch.constant.int 4096
    %2057 = torch.prim.ListConstruct %2056, %int4096_2189 : (!torch.int, !torch.int) -> !torch.list<int>
    %2058 = torch.aten.view %2054, %2057 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %2058, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %2059 = torch.aten.mm %2058, %2055 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %2059, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_2190 = torch.constant.int 4
    %int4096_2191 = torch.constant.int 4096
    %2060 = torch.prim.ListConstruct %int4_2190, %1950, %int4096_2191 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2061 = torch.aten.view %2059, %2060 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %2061, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_2192 = torch.constant.int 1
    %2062 = torch.aten.add.Tensor %1899, %2061, %int1_2192 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %2062, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int6_2193 = torch.constant.int 6
    %2063 = torch.prims.convert_element_type %2062, %int6_2193 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %2063, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_2194 = torch.constant.int 2
    %2064 = torch.aten.pow.Tensor_Scalar %2063, %int2_2194 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %2064, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int-1_2195 = torch.constant.int -1
    %2065 = torch.prim.ListConstruct %int-1_2195 : (!torch.int) -> !torch.list<int>
    %true_2196 = torch.constant.bool true
    %none_2197 = torch.constant.none
    %2066 = torch.aten.mean.dim %2064, %2065, %true_2196, %none_2197 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %2066, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_2198 = torch.constant.float 9.9999997473787516E-6
    %int1_2199 = torch.constant.int 1
    %2067 = torch.aten.add.Scalar %2066, %float9.999990e-06_2198, %int1_2199 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %2067, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %2068 = torch.aten.rsqrt %2067 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %2068, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %2069 = torch.aten.mul.Tensor %2063, %2068 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %2069, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %2070 = torch.aten.mul.Tensor %78, %2069 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %2070, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int5_2200 = torch.constant.int 5
    %2071 = torch.prims.convert_element_type %2070, %int5_2200 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %2071, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int-2_2201 = torch.constant.int -2
    %int-1_2202 = torch.constant.int -1
    %2072 = torch.aten.transpose.int %79, %int-2_2201, %int-1_2202 : !torch.vtensor<[14336,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,14336],f16>
    %int4_2203 = torch.constant.int 4
    %2073 = torch.aten.mul.int %int4_2203, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_2204 = torch.constant.int 4096
    %2074 = torch.prim.ListConstruct %2073, %int4096_2204 : (!torch.int, !torch.int) -> !torch.list<int>
    %2075 = torch.aten.view %2071, %2074 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %2075, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %2076 = torch.aten.mm %2075, %2072 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,14336],f16> -> !torch.vtensor<[?,14336],f16>
    torch.bind_symbolic_shape %2076, [%292], affine_map<()[s0] -> (s0 * 128, 14336)> : !torch.vtensor<[?,14336],f16>
    %int4_2205 = torch.constant.int 4
    %int14336_2206 = torch.constant.int 14336
    %2077 = torch.prim.ListConstruct %int4_2205, %294, %int14336_2206 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2078 = torch.aten.view %2076, %2077 : !torch.vtensor<[?,14336],f16>, !torch.list<int> -> !torch.vtensor<[4,?,14336],f16>
    torch.bind_symbolic_shape %2078, [%292], affine_map<()[s0] -> (4, s0 * 32, 14336)> : !torch.vtensor<[4,?,14336],f16>
    %2079 = torch.aten.silu %2078 : !torch.vtensor<[4,?,14336],f16> -> !torch.vtensor<[4,?,14336],f16>
    torch.bind_symbolic_shape %2079, [%292], affine_map<()[s0] -> (4, s0 * 32, 14336)> : !torch.vtensor<[4,?,14336],f16>
    %int-2_2207 = torch.constant.int -2
    %int-1_2208 = torch.constant.int -1
    %2080 = torch.aten.transpose.int %80, %int-2_2207, %int-1_2208 : !torch.vtensor<[14336,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,14336],f16>
    %int4_2209 = torch.constant.int 4
    %2081 = torch.aten.mul.int %int4_2209, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_2210 = torch.constant.int 4096
    %2082 = torch.prim.ListConstruct %2081, %int4096_2210 : (!torch.int, !torch.int) -> !torch.list<int>
    %2083 = torch.aten.view %2071, %2082 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %2083, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %2084 = torch.aten.mm %2083, %2080 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,14336],f16> -> !torch.vtensor<[?,14336],f16>
    torch.bind_symbolic_shape %2084, [%292], affine_map<()[s0] -> (s0 * 128, 14336)> : !torch.vtensor<[?,14336],f16>
    %int4_2211 = torch.constant.int 4
    %int14336_2212 = torch.constant.int 14336
    %2085 = torch.prim.ListConstruct %int4_2211, %294, %int14336_2212 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2086 = torch.aten.view %2084, %2085 : !torch.vtensor<[?,14336],f16>, !torch.list<int> -> !torch.vtensor<[4,?,14336],f16>
    torch.bind_symbolic_shape %2086, [%292], affine_map<()[s0] -> (4, s0 * 32, 14336)> : !torch.vtensor<[4,?,14336],f16>
    %2087 = torch.aten.mul.Tensor %2079, %2086 : !torch.vtensor<[4,?,14336],f16>, !torch.vtensor<[4,?,14336],f16> -> !torch.vtensor<[4,?,14336],f16>
    torch.bind_symbolic_shape %2087, [%292], affine_map<()[s0] -> (4, s0 * 32, 14336)> : !torch.vtensor<[4,?,14336],f16>
    %int-2_2213 = torch.constant.int -2
    %int-1_2214 = torch.constant.int -1
    %2088 = torch.aten.transpose.int %81, %int-2_2213, %int-1_2214 : !torch.vtensor<[4096,14336],f16>, !torch.int, !torch.int -> !torch.vtensor<[14336,4096],f16>
    %int1_2215 = torch.constant.int 1
    %2089 = torch.aten.size.int %2078, %int1_2215 : !torch.vtensor<[4,?,14336],f16>, !torch.int -> !torch.int
    %int4_2216 = torch.constant.int 4
    %2090 = torch.aten.mul.int %int4_2216, %2089 : !torch.int, !torch.int -> !torch.int
    %int14336_2217 = torch.constant.int 14336
    %2091 = torch.prim.ListConstruct %2090, %int14336_2217 : (!torch.int, !torch.int) -> !torch.list<int>
    %2092 = torch.aten.view %2087, %2091 : !torch.vtensor<[4,?,14336],f16>, !torch.list<int> -> !torch.vtensor<[?,14336],f16>
    torch.bind_symbolic_shape %2092, [%292], affine_map<()[s0] -> (s0 * 128, 14336)> : !torch.vtensor<[?,14336],f16>
    %2093 = torch.aten.mm %2092, %2088 : !torch.vtensor<[?,14336],f16>, !torch.vtensor<[14336,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %2093, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_2218 = torch.constant.int 4
    %int4096_2219 = torch.constant.int 4096
    %2094 = torch.prim.ListConstruct %int4_2218, %2089, %int4096_2219 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2095 = torch.aten.view %2093, %2094 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %2095, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_2220 = torch.constant.int 1
    %2096 = torch.aten.add.Tensor %2062, %2095, %int1_2220 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %2096, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int6_2221 = torch.constant.int 6
    %2097 = torch.prims.convert_element_type %2096, %int6_2221 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %2097, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_2222 = torch.constant.int 2
    %2098 = torch.aten.pow.Tensor_Scalar %2097, %int2_2222 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %2098, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int-1_2223 = torch.constant.int -1
    %2099 = torch.prim.ListConstruct %int-1_2223 : (!torch.int) -> !torch.list<int>
    %true_2224 = torch.constant.bool true
    %none_2225 = torch.constant.none
    %2100 = torch.aten.mean.dim %2098, %2099, %true_2224, %none_2225 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %2100, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_2226 = torch.constant.float 9.9999997473787516E-6
    %int1_2227 = torch.constant.int 1
    %2101 = torch.aten.add.Scalar %2100, %float9.999990e-06_2226, %int1_2227 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %2101, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %2102 = torch.aten.rsqrt %2101 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %2102, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %2103 = torch.aten.mul.Tensor %2097, %2102 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %2103, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %2104 = torch.aten.mul.Tensor %82, %2103 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %2104, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int5_2228 = torch.constant.int 5
    %2105 = torch.prims.convert_element_type %2104, %int5_2228 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %2105, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int-2_2229 = torch.constant.int -2
    %int-1_2230 = torch.constant.int -1
    %2106 = torch.aten.transpose.int %83, %int-2_2229, %int-1_2230 : !torch.vtensor<[4096,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,4096],f16>
    %int4_2231 = torch.constant.int 4
    %2107 = torch.aten.mul.int %int4_2231, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_2232 = torch.constant.int 4096
    %2108 = torch.prim.ListConstruct %2107, %int4096_2232 : (!torch.int, !torch.int) -> !torch.list<int>
    %2109 = torch.aten.view %2105, %2108 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %2109, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %2110 = torch.aten.mm %2109, %2106 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %2110, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_2233 = torch.constant.int 4
    %int4096_2234 = torch.constant.int 4096
    %2111 = torch.prim.ListConstruct %int4_2233, %294, %int4096_2234 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2112 = torch.aten.view %2110, %2111 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %2112, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int-2_2235 = torch.constant.int -2
    %int-1_2236 = torch.constant.int -1
    %2113 = torch.aten.transpose.int %84, %int-2_2235, %int-1_2236 : !torch.vtensor<[1024,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,1024],f16>
    %int4_2237 = torch.constant.int 4
    %2114 = torch.aten.mul.int %int4_2237, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_2238 = torch.constant.int 4096
    %2115 = torch.prim.ListConstruct %2114, %int4096_2238 : (!torch.int, !torch.int) -> !torch.list<int>
    %2116 = torch.aten.view %2105, %2115 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %2116, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %2117 = torch.aten.mm %2116, %2113 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1024],f16> -> !torch.vtensor<[?,1024],f16>
    torch.bind_symbolic_shape %2117, [%292], affine_map<()[s0] -> (s0 * 128, 1024)> : !torch.vtensor<[?,1024],f16>
    %int4_2239 = torch.constant.int 4
    %int1024_2240 = torch.constant.int 1024
    %2118 = torch.prim.ListConstruct %int4_2239, %294, %int1024_2240 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2119 = torch.aten.view %2117, %2118 : !torch.vtensor<[?,1024],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1024],f16>
    torch.bind_symbolic_shape %2119, [%292], affine_map<()[s0] -> (4, s0 * 32, 1024)> : !torch.vtensor<[4,?,1024],f16>
    %int-2_2241 = torch.constant.int -2
    %int-1_2242 = torch.constant.int -1
    %2120 = torch.aten.transpose.int %85, %int-2_2241, %int-1_2242 : !torch.vtensor<[1024,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,1024],f16>
    %int4_2243 = torch.constant.int 4
    %2121 = torch.aten.mul.int %int4_2243, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_2244 = torch.constant.int 4096
    %2122 = torch.prim.ListConstruct %2121, %int4096_2244 : (!torch.int, !torch.int) -> !torch.list<int>
    %2123 = torch.aten.view %2105, %2122 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %2123, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %2124 = torch.aten.mm %2123, %2120 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1024],f16> -> !torch.vtensor<[?,1024],f16>
    torch.bind_symbolic_shape %2124, [%292], affine_map<()[s0] -> (s0 * 128, 1024)> : !torch.vtensor<[?,1024],f16>
    %int4_2245 = torch.constant.int 4
    %int1024_2246 = torch.constant.int 1024
    %2125 = torch.prim.ListConstruct %int4_2245, %294, %int1024_2246 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2126 = torch.aten.view %2124, %2125 : !torch.vtensor<[?,1024],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1024],f16>
    torch.bind_symbolic_shape %2126, [%292], affine_map<()[s0] -> (4, s0 * 32, 1024)> : !torch.vtensor<[4,?,1024],f16>
    %int4_2247 = torch.constant.int 4
    %int32_2248 = torch.constant.int 32
    %int128_2249 = torch.constant.int 128
    %2127 = torch.prim.ListConstruct %int4_2247, %294, %int32_2248, %int128_2249 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2128 = torch.aten.view %2112, %2127 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %2128, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int4_2250 = torch.constant.int 4
    %int8_2251 = torch.constant.int 8
    %int128_2252 = torch.constant.int 128
    %2129 = torch.prim.ListConstruct %int4_2250, %294, %int8_2251, %int128_2252 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2130 = torch.aten.view %2119, %2129 : !torch.vtensor<[4,?,1024],f16>, !torch.list<int> -> !torch.vtensor<[4,?,8,128],f16>
    torch.bind_symbolic_shape %2130, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 128)> : !torch.vtensor<[4,?,8,128],f16>
    %int4_2253 = torch.constant.int 4
    %int8_2254 = torch.constant.int 8
    %int128_2255 = torch.constant.int 128
    %2131 = torch.prim.ListConstruct %int4_2253, %294, %int8_2254, %int128_2255 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2132 = torch.aten.view %2126, %2131 : !torch.vtensor<[4,?,1024],f16>, !torch.list<int> -> !torch.vtensor<[4,?,8,128],f16>
    torch.bind_symbolic_shape %2132, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 128)> : !torch.vtensor<[4,?,8,128],f16>
    %int131072_2256 = torch.constant.int 131072
    %none_2257 = torch.constant.none
    %none_2258 = torch.constant.none
    %cpu_2259 = torch.constant.device "cpu"
    %false_2260 = torch.constant.bool false
    %2133 = torch.aten.arange %int131072_2256, %none_2257, %none_2258, %cpu_2259, %false_2260 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64>
    %int0_2261 = torch.constant.int 0
    %int128_2262 = torch.constant.int 128
    %int2_2263 = torch.constant.int 2
    %none_2264 = torch.constant.none
    %none_2265 = torch.constant.none
    %cpu_2266 = torch.constant.device "cpu"
    %false_2267 = torch.constant.bool false
    %2134 = torch.aten.arange.start_step %int0_2261, %int128_2262, %int2_2263, %none_2264, %none_2265, %cpu_2266, %false_2267 : !torch.int, !torch.int, !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64>
    %int0_2268 = torch.constant.int 0
    %int0_2269 = torch.constant.int 0
    %int64_2270 = torch.constant.int 64
    %int1_2271 = torch.constant.int 1
    %2135 = torch.aten.slice.Tensor %2134, %int0_2268, %int0_2269, %int64_2270, %int1_2271 : !torch.vtensor<[64],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[64],si64>
    %int6_2272 = torch.constant.int 6
    %2136 = torch.prims.convert_element_type %2135, %int6_2272 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32>
    %int128_2273 = torch.constant.int 128
    %2137 = torch.aten.div.Scalar %2136, %int128_2273 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32>
    %float5.000000e05_2274 = torch.constant.float 5.000000e+05
    %2138 = torch.aten.pow.Scalar %float5.000000e05_2274, %2137 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %2139 = torch.aten.reciprocal %2138 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %float1.000000e00_2275 = torch.constant.float 1.000000e+00
    %2140 = torch.aten.mul.Scalar %2139, %float1.000000e00_2275 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32>
    %int131072_2276 = torch.constant.int 131072
    %int1_2277 = torch.constant.int 1
    %2141 = torch.prim.ListConstruct %int131072_2276, %int1_2277 : (!torch.int, !torch.int) -> !torch.list<int>
    %2142 = torch.aten.view %2133, %2141 : !torch.vtensor<[131072],si64>, !torch.list<int> -> !torch.vtensor<[131072,1],si64>
    %2143 = torch.aten.mul.Tensor %2142, %2140 : !torch.vtensor<[131072,1],si64>, !torch.vtensor<[64],f32> -> !torch.vtensor<[131072,64],f32>
    %2144 = torch.aten.cos %2143 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %2145 = torch.aten.sin %2143 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %2146 = torch.aten.complex %2144, %2145 : !torch.vtensor<[131072,64],f32>, !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],complex<f32>>
    %int1_2278 = torch.constant.int 1
    %2147 = torch.aten.size.int %2112, %int1_2278 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.int
    %int0_2279 = torch.constant.int 0
    %2148 = torch.aten.add.int %int0_2279, %2147 : !torch.int, !torch.int -> !torch.int
    %int0_2280 = torch.constant.int 0
    %int0_2281 = torch.constant.int 0
    %int1_2282 = torch.constant.int 1
    %2149 = torch.aten.slice.Tensor %2146, %int0_2280, %int0_2281, %2148, %int1_2282 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %2149, [%292], affine_map<()[s0] -> (s0 * 32, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_2283 = torch.constant.int 1
    %int0_2284 = torch.constant.int 0
    %int9223372036854775807_2285 = torch.constant.int 9223372036854775807
    %int1_2286 = torch.constant.int 1
    %2150 = torch.aten.slice.Tensor %2149, %int1_2283, %int0_2284, %int9223372036854775807_2285, %int1_2286 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %2150, [%292], affine_map<()[s0] -> (s0 * 32, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_2287 = torch.constant.int 0
    %2151 = torch.aten.unsqueeze %2150, %int0_2287 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %2151, [%292], affine_map<()[s0] -> (1, s0 * 32, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_2288 = torch.constant.int 2
    %2152 = torch.aten.unsqueeze %2151, %int2_2288 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %2152, [%292], affine_map<()[s0] -> (1, s0 * 32, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_2289 = torch.constant.int 3
    %int0_2290 = torch.constant.int 0
    %int9223372036854775807_2291 = torch.constant.int 9223372036854775807
    %int1_2292 = torch.constant.int 1
    %2153 = torch.aten.slice.Tensor %2152, %int3_2289, %int0_2290, %int9223372036854775807_2291, %int1_2292 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %2153, [%292], affine_map<()[s0] -> (1, s0 * 32, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %2154 = torch_c.to_builtin_tensor %2128 : !torch.vtensor<[4,?,32,128],f16> -> tensor<4x?x32x128xf16>
    %c1_2293 = arith.constant 1 : index
    %dim_2294 = tensor.dim %2154, %c1_2293 : tensor<4x?x32x128xf16>
    %2155 = flow.tensor.bitcast %2154 : tensor<4x?x32x128xf16>{%dim_2294} -> tensor<4x?x32x64xcomplex<f16>>{%dim_2294}
    %2156 = torch_c.from_builtin_tensor %2155 : tensor<4x?x32x64xcomplex<f16>> -> !torch.vtensor<[4,?,32,64],complex<f16>>
    torch.bind_symbolic_shape %2156, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 64)> : !torch.vtensor<[4,?,32,64],complex<f16>>
    %2157 = torch.aten.mul.Tensor %2156, %2153 : !torch.vtensor<[4,?,32,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,32,64],complex<f32>>
    torch.bind_symbolic_shape %2157, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 64)> : !torch.vtensor<[4,?,32,64],complex<f32>>
    %2158 = torch_c.to_builtin_tensor %2157 : !torch.vtensor<[4,?,32,64],complex<f32>> -> tensor<4x?x32x64xcomplex<f32>>
    %c1_2295 = arith.constant 1 : index
    %dim_2296 = tensor.dim %2158, %c1_2295 : tensor<4x?x32x64xcomplex<f32>>
    %2159 = flow.tensor.bitcast %2158 : tensor<4x?x32x64xcomplex<f32>>{%dim_2296} -> tensor<4x?x32x128xf32>{%dim_2296}
    %2160 = torch_c.from_builtin_tensor %2159 : tensor<4x?x32x128xf32> -> !torch.vtensor<[4,?,32,128],f32>
    torch.bind_symbolic_shape %2160, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f32>
    %int5_2297 = torch.constant.int 5
    %2161 = torch.prims.convert_element_type %2160, %int5_2297 : !torch.vtensor<[4,?,32,128],f32>, !torch.int -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %2161, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int131072_2298 = torch.constant.int 131072
    %none_2299 = torch.constant.none
    %none_2300 = torch.constant.none
    %cpu_2301 = torch.constant.device "cpu"
    %false_2302 = torch.constant.bool false
    %2162 = torch.aten.arange %int131072_2298, %none_2299, %none_2300, %cpu_2301, %false_2302 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64>
    %int0_2303 = torch.constant.int 0
    %int128_2304 = torch.constant.int 128
    %int2_2305 = torch.constant.int 2
    %none_2306 = torch.constant.none
    %none_2307 = torch.constant.none
    %cpu_2308 = torch.constant.device "cpu"
    %false_2309 = torch.constant.bool false
    %2163 = torch.aten.arange.start_step %int0_2303, %int128_2304, %int2_2305, %none_2306, %none_2307, %cpu_2308, %false_2309 : !torch.int, !torch.int, !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64>
    %int0_2310 = torch.constant.int 0
    %int0_2311 = torch.constant.int 0
    %int64_2312 = torch.constant.int 64
    %int1_2313 = torch.constant.int 1
    %2164 = torch.aten.slice.Tensor %2163, %int0_2310, %int0_2311, %int64_2312, %int1_2313 : !torch.vtensor<[64],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[64],si64>
    %int6_2314 = torch.constant.int 6
    %2165 = torch.prims.convert_element_type %2164, %int6_2314 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32>
    %int128_2315 = torch.constant.int 128
    %2166 = torch.aten.div.Scalar %2165, %int128_2315 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32>
    %float5.000000e05_2316 = torch.constant.float 5.000000e+05
    %2167 = torch.aten.pow.Scalar %float5.000000e05_2316, %2166 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %2168 = torch.aten.reciprocal %2167 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %float1.000000e00_2317 = torch.constant.float 1.000000e+00
    %2169 = torch.aten.mul.Scalar %2168, %float1.000000e00_2317 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32>
    %int131072_2318 = torch.constant.int 131072
    %int1_2319 = torch.constant.int 1
    %2170 = torch.prim.ListConstruct %int131072_2318, %int1_2319 : (!torch.int, !torch.int) -> !torch.list<int>
    %2171 = torch.aten.view %2162, %2170 : !torch.vtensor<[131072],si64>, !torch.list<int> -> !torch.vtensor<[131072,1],si64>
    %2172 = torch.aten.mul.Tensor %2171, %2169 : !torch.vtensor<[131072,1],si64>, !torch.vtensor<[64],f32> -> !torch.vtensor<[131072,64],f32>
    %2173 = torch.aten.cos %2172 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %2174 = torch.aten.sin %2172 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %2175 = torch.aten.complex %2173, %2174 : !torch.vtensor<[131072,64],f32>, !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],complex<f32>>
    %int1_2320 = torch.constant.int 1
    %2176 = torch.aten.size.int %2119, %int1_2320 : !torch.vtensor<[4,?,1024],f16>, !torch.int -> !torch.int
    %int0_2321 = torch.constant.int 0
    %2177 = torch.aten.add.int %int0_2321, %2176 : !torch.int, !torch.int -> !torch.int
    %int0_2322 = torch.constant.int 0
    %int0_2323 = torch.constant.int 0
    %int1_2324 = torch.constant.int 1
    %2178 = torch.aten.slice.Tensor %2175, %int0_2322, %int0_2323, %2177, %int1_2324 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %2178, [%292], affine_map<()[s0] -> (s0 * 32, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_2325 = torch.constant.int 1
    %int0_2326 = torch.constant.int 0
    %int9223372036854775807_2327 = torch.constant.int 9223372036854775807
    %int1_2328 = torch.constant.int 1
    %2179 = torch.aten.slice.Tensor %2178, %int1_2325, %int0_2326, %int9223372036854775807_2327, %int1_2328 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %2179, [%292], affine_map<()[s0] -> (s0 * 32, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_2329 = torch.constant.int 0
    %2180 = torch.aten.unsqueeze %2179, %int0_2329 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %2180, [%292], affine_map<()[s0] -> (1, s0 * 32, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_2330 = torch.constant.int 2
    %2181 = torch.aten.unsqueeze %2180, %int2_2330 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %2181, [%292], affine_map<()[s0] -> (1, s0 * 32, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_2331 = torch.constant.int 3
    %int0_2332 = torch.constant.int 0
    %int9223372036854775807_2333 = torch.constant.int 9223372036854775807
    %int1_2334 = torch.constant.int 1
    %2182 = torch.aten.slice.Tensor %2181, %int3_2331, %int0_2332, %int9223372036854775807_2333, %int1_2334 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %2182, [%292], affine_map<()[s0] -> (1, s0 * 32, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %2183 = torch_c.to_builtin_tensor %2130 : !torch.vtensor<[4,?,8,128],f16> -> tensor<4x?x8x128xf16>
    %c1_2335 = arith.constant 1 : index
    %dim_2336 = tensor.dim %2183, %c1_2335 : tensor<4x?x8x128xf16>
    %2184 = flow.tensor.bitcast %2183 : tensor<4x?x8x128xf16>{%dim_2336} -> tensor<4x?x8x64xcomplex<f16>>{%dim_2336}
    %2185 = torch_c.from_builtin_tensor %2184 : tensor<4x?x8x64xcomplex<f16>> -> !torch.vtensor<[4,?,8,64],complex<f16>>
    torch.bind_symbolic_shape %2185, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 64)> : !torch.vtensor<[4,?,8,64],complex<f16>>
    %2186 = torch.aten.mul.Tensor %2185, %2182 : !torch.vtensor<[4,?,8,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,8,64],complex<f32>>
    torch.bind_symbolic_shape %2186, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 64)> : !torch.vtensor<[4,?,8,64],complex<f32>>
    %2187 = torch_c.to_builtin_tensor %2186 : !torch.vtensor<[4,?,8,64],complex<f32>> -> tensor<4x?x8x64xcomplex<f32>>
    %c1_2337 = arith.constant 1 : index
    %dim_2338 = tensor.dim %2187, %c1_2337 : tensor<4x?x8x64xcomplex<f32>>
    %2188 = flow.tensor.bitcast %2187 : tensor<4x?x8x64xcomplex<f32>>{%dim_2338} -> tensor<4x?x8x128xf32>{%dim_2338}
    %2189 = torch_c.from_builtin_tensor %2188 : tensor<4x?x8x128xf32> -> !torch.vtensor<[4,?,8,128],f32>
    torch.bind_symbolic_shape %2189, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 128)> : !torch.vtensor<[4,?,8,128],f32>
    %int5_2339 = torch.constant.int 5
    %2190 = torch.prims.convert_element_type %2189, %int5_2339 : !torch.vtensor<[4,?,8,128],f32>, !torch.int -> !torch.vtensor<[4,?,8,128],f16>
    torch.bind_symbolic_shape %2190, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 128)> : !torch.vtensor<[4,?,8,128],f16>
    %int64_2340 = torch.constant.int 64
    %2191 = torch.aten.mul.Scalar %arg2, %int64_2340 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %2191, [%292], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int18 = torch.constant.int 18
    %int1_2341 = torch.constant.int 1
    %2192 = torch.aten.add.Scalar %2191, %int18, %int1_2341 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %2192, [%292], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int4_2342 = torch.constant.int 4
    %int32_2343 = torch.constant.int 32
    %int8_2344 = torch.constant.int 8
    %int128_2345 = torch.constant.int 128
    %2193 = torch.prim.ListConstruct %int4_2342, %425, %int32_2343, %int8_2344, %int128_2345 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2194 = torch.aten.view %2190, %2193 : !torch.vtensor<[4,?,8,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,8,128],f16>
    torch.bind_symbolic_shape %2194, [%292], affine_map<()[s0] -> (4, s0, 32, 8, 128)> : !torch.vtensor<[4,?,32,8,128],f16>
    %int4_2346 = torch.constant.int 4
    %2195 = torch.aten.mul.int %int4_2346, %425 : !torch.int, !torch.int -> !torch.int
    %int32_2347 = torch.constant.int 32
    %int8_2348 = torch.constant.int 8
    %int128_2349 = torch.constant.int 128
    %2196 = torch.prim.ListConstruct %2195, %int32_2347, %int8_2348, %int128_2349 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2197 = torch.aten.view %2194, %2196 : !torch.vtensor<[4,?,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %2197, [%292], affine_map<()[s0] -> (s0 * 4, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int4_2350 = torch.constant.int 4
    %2198 = torch.aten.mul.int %int4_2350, %425 : !torch.int, !torch.int -> !torch.int
    %2199 = torch.prim.ListConstruct %2198 : (!torch.int) -> !torch.list<int>
    %2200 = torch.aten.view %2192, %2199 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %2200, [%292], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int32_2351 = torch.constant.int 32
    %int2_2352 = torch.constant.int 2
    %int32_2353 = torch.constant.int 32
    %int8_2354 = torch.constant.int 8
    %int128_2355 = torch.constant.int 128
    %2201 = torch.prim.ListConstruct %416, %int32_2351, %int2_2352, %int32_2353, %int8_2354, %int128_2355 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2202 = torch.aten.view %2034, %2201 : !torch.vtensor<[?,2097152],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f16>
    torch.bind_symbolic_shape %2202, [%293], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f16>
    %int32_2356 = torch.constant.int 32
    %2203 = torch.aten.mul.int %416, %int32_2356 : !torch.int, !torch.int -> !torch.int
    %int2_2357 = torch.constant.int 2
    %2204 = torch.aten.mul.int %2203, %int2_2357 : !torch.int, !torch.int -> !torch.int
    %int32_2358 = torch.constant.int 32
    %int8_2359 = torch.constant.int 8
    %int128_2360 = torch.constant.int 128
    %2205 = torch.prim.ListConstruct %2204, %int32_2358, %int8_2359, %int128_2360 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2206 = torch.aten.view %2202, %2205 : !torch.vtensor<[?,32,2,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %2206, [%293], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %2207 = torch.prim.ListConstruct %2200 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_2361 = torch.constant.bool false
    %2208 = torch.aten.index_put %2206, %2207, %2197, %false_2361 : !torch.vtensor<[?,32,8,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,32,8,128],f16>, !torch.bool -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %2208, [%293], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int32_2362 = torch.constant.int 32
    %int2_2363 = torch.constant.int 2
    %int32_2364 = torch.constant.int 32
    %int8_2365 = torch.constant.int 8
    %int128_2366 = torch.constant.int 128
    %2209 = torch.prim.ListConstruct %416, %int32_2362, %int2_2363, %int32_2364, %int8_2365, %int128_2366 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2210 = torch.aten.view %2208, %2209 : !torch.vtensor<[?,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f16>
    torch.bind_symbolic_shape %2210, [%293], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f16>
    %int2097152_2367 = torch.constant.int 2097152
    %2211 = torch.prim.ListConstruct %416, %int2097152_2367 : (!torch.int, !torch.int) -> !torch.list<int>
    %2212 = torch.aten.view %2210, %2211 : !torch.vtensor<[?,32,2,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,2097152],f16>
    torch.bind_symbolic_shape %2212, [%293], affine_map<()[s0] -> (s0, 2097152)> : !torch.vtensor<[?,2097152],f16>
    %int32_2368 = torch.constant.int 32
    %int2_2369 = torch.constant.int 2
    %int32_2370 = torch.constant.int 32
    %int8_2371 = torch.constant.int 8
    %int128_2372 = torch.constant.int 128
    %2213 = torch.prim.ListConstruct %416, %int32_2368, %int2_2369, %int32_2370, %int8_2371, %int128_2372 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2214 = torch.aten.view %2212, %2213 : !torch.vtensor<[?,2097152],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f16>
    torch.bind_symbolic_shape %2214, [%293], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f16>
    %int32_2373 = torch.constant.int 32
    %int8_2374 = torch.constant.int 8
    %int128_2375 = torch.constant.int 128
    %2215 = torch.prim.ListConstruct %2204, %int32_2373, %int8_2374, %int128_2375 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2216 = torch.aten.view %2214, %2215 : !torch.vtensor<[?,32,2,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %2216, [%293], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int4_2376 = torch.constant.int 4
    %int32_2377 = torch.constant.int 32
    %int8_2378 = torch.constant.int 8
    %int128_2379 = torch.constant.int 128
    %2217 = torch.prim.ListConstruct %int4_2376, %425, %int32_2377, %int8_2378, %int128_2379 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2218 = torch.aten.view %2132, %2217 : !torch.vtensor<[4,?,8,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,8,128],f16>
    torch.bind_symbolic_shape %2218, [%292], affine_map<()[s0] -> (4, s0, 32, 8, 128)> : !torch.vtensor<[4,?,32,8,128],f16>
    %int4_2380 = torch.constant.int 4
    %2219 = torch.aten.mul.int %int4_2380, %425 : !torch.int, !torch.int -> !torch.int
    %int32_2381 = torch.constant.int 32
    %int8_2382 = torch.constant.int 8
    %int128_2383 = torch.constant.int 128
    %2220 = torch.prim.ListConstruct %2219, %int32_2381, %int8_2382, %int128_2383 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2221 = torch.aten.view %2218, %2220 : !torch.vtensor<[4,?,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %2221, [%292], affine_map<()[s0] -> (s0 * 4, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int1_2384 = torch.constant.int 1
    %int1_2385 = torch.constant.int 1
    %2222 = torch.aten.add.Scalar %2192, %int1_2384, %int1_2385 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %2222, [%292], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int4_2386 = torch.constant.int 4
    %2223 = torch.aten.mul.int %int4_2386, %425 : !torch.int, !torch.int -> !torch.int
    %2224 = torch.prim.ListConstruct %2223 : (!torch.int) -> !torch.list<int>
    %2225 = torch.aten.view %2222, %2224 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %2225, [%292], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %2226 = torch.prim.ListConstruct %2225 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_2387 = torch.constant.bool false
    %2227 = torch.aten.index_put %2216, %2226, %2221, %false_2387 : !torch.vtensor<[?,32,8,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,32,8,128],f16>, !torch.bool -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %2227, [%293], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int32_2388 = torch.constant.int 32
    %int2_2389 = torch.constant.int 2
    %int32_2390 = torch.constant.int 32
    %int8_2391 = torch.constant.int 8
    %int128_2392 = torch.constant.int 128
    %2228 = torch.prim.ListConstruct %416, %int32_2388, %int2_2389, %int32_2390, %int8_2391, %int128_2392 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2229 = torch.aten.view %2227, %2228 : !torch.vtensor<[?,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f16>
    torch.bind_symbolic_shape %2229, [%293], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f16>
    %int2097152_2393 = torch.constant.int 2097152
    %2230 = torch.prim.ListConstruct %416, %int2097152_2393 : (!torch.int, !torch.int) -> !torch.list<int>
    %2231 = torch.aten.view %2229, %2230 : !torch.vtensor<[?,32,2,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,2097152],f16>
    torch.bind_symbolic_shape %2231, [%293], affine_map<()[s0] -> (s0, 2097152)> : !torch.vtensor<[?,2097152],f16>
    %int-2_2394 = torch.constant.int -2
    %2232 = torch.aten.unsqueeze %2190, %int-2_2394 : !torch.vtensor<[4,?,8,128],f16>, !torch.int -> !torch.vtensor<[4,?,8,1,128],f16>
    torch.bind_symbolic_shape %2232, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 1, 128)> : !torch.vtensor<[4,?,8,1,128],f16>
    %int4_2395 = torch.constant.int 4
    %int8_2396 = torch.constant.int 8
    %int4_2397 = torch.constant.int 4
    %int128_2398 = torch.constant.int 128
    %2233 = torch.prim.ListConstruct %int4_2395, %2176, %int8_2396, %int4_2397, %int128_2398 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_2399 = torch.constant.bool false
    %2234 = torch.aten.expand %2232, %2233, %false_2399 : !torch.vtensor<[4,?,8,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,8,4,128],f16>
    torch.bind_symbolic_shape %2234, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 4, 128)> : !torch.vtensor<[4,?,8,4,128],f16>
    %int0_2400 = torch.constant.int 0
    %2235 = torch.aten.clone %2234, %int0_2400 : !torch.vtensor<[4,?,8,4,128],f16>, !torch.int -> !torch.vtensor<[4,?,8,4,128],f16>
    torch.bind_symbolic_shape %2235, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 4, 128)> : !torch.vtensor<[4,?,8,4,128],f16>
    %int4_2401 = torch.constant.int 4
    %int32_2402 = torch.constant.int 32
    %int128_2403 = torch.constant.int 128
    %2236 = torch.prim.ListConstruct %int4_2401, %2176, %int32_2402, %int128_2403 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2237 = torch.aten._unsafe_view %2235, %2236 : !torch.vtensor<[4,?,8,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %2237, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int-2_2404 = torch.constant.int -2
    %2238 = torch.aten.unsqueeze %2132, %int-2_2404 : !torch.vtensor<[4,?,8,128],f16>, !torch.int -> !torch.vtensor<[4,?,8,1,128],f16>
    torch.bind_symbolic_shape %2238, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 1, 128)> : !torch.vtensor<[4,?,8,1,128],f16>
    %int1_2405 = torch.constant.int 1
    %2239 = torch.aten.size.int %2126, %int1_2405 : !torch.vtensor<[4,?,1024],f16>, !torch.int -> !torch.int
    %int4_2406 = torch.constant.int 4
    %int8_2407 = torch.constant.int 8
    %int4_2408 = torch.constant.int 4
    %int128_2409 = torch.constant.int 128
    %2240 = torch.prim.ListConstruct %int4_2406, %2239, %int8_2407, %int4_2408, %int128_2409 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_2410 = torch.constant.bool false
    %2241 = torch.aten.expand %2238, %2240, %false_2410 : !torch.vtensor<[4,?,8,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,8,4,128],f16>
    torch.bind_symbolic_shape %2241, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 4, 128)> : !torch.vtensor<[4,?,8,4,128],f16>
    %int0_2411 = torch.constant.int 0
    %2242 = torch.aten.clone %2241, %int0_2411 : !torch.vtensor<[4,?,8,4,128],f16>, !torch.int -> !torch.vtensor<[4,?,8,4,128],f16>
    torch.bind_symbolic_shape %2242, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 4, 128)> : !torch.vtensor<[4,?,8,4,128],f16>
    %int4_2412 = torch.constant.int 4
    %int32_2413 = torch.constant.int 32
    %int128_2414 = torch.constant.int 128
    %2243 = torch.prim.ListConstruct %int4_2412, %2239, %int32_2413, %int128_2414 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2244 = torch.aten._unsafe_view %2242, %2243 : !torch.vtensor<[4,?,8,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %2244, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int1_2415 = torch.constant.int 1
    %int2_2416 = torch.constant.int 2
    %2245 = torch.aten.transpose.int %2161, %int1_2415, %int2_2416 : !torch.vtensor<[4,?,32,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,32,?,128],f16>
    torch.bind_symbolic_shape %2245, [%292], affine_map<()[s0] -> (4, 32, s0 * 32, 128)> : !torch.vtensor<[4,32,?,128],f16>
    %int1_2417 = torch.constant.int 1
    %int2_2418 = torch.constant.int 2
    %2246 = torch.aten.transpose.int %2237, %int1_2417, %int2_2418 : !torch.vtensor<[4,?,32,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,32,?,128],f16>
    torch.bind_symbolic_shape %2246, [%292], affine_map<()[s0] -> (4, 32, s0 * 32, 128)> : !torch.vtensor<[4,32,?,128],f16>
    %int1_2419 = torch.constant.int 1
    %int2_2420 = torch.constant.int 2
    %2247 = torch.aten.transpose.int %2244, %int1_2419, %int2_2420 : !torch.vtensor<[4,?,32,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,32,?,128],f16>
    torch.bind_symbolic_shape %2247, [%292], affine_map<()[s0] -> (4, 32, s0 * 32, 128)> : !torch.vtensor<[4,32,?,128],f16>
    %float0.000000e00_2421 = torch.constant.float 0.000000e+00
    %false_2422 = torch.constant.bool false
    %none_2423 = torch.constant.none
    %2248:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%2245, %2246, %2247, %float0.000000e00_2421, %false_2422, %320, %none_2423) : (!torch.vtensor<[4,32,?,128],f16>, !torch.vtensor<[4,32,?,128],f16>, !torch.vtensor<[4,32,?,128],f16>, !torch.float, !torch.bool, !torch.vtensor<[4,1,?,?],f16>, !torch.none) -> (!torch.vtensor<[4,32,?,128],f16>, !torch.vtensor<[4,32,?],f32>) 
    torch.bind_symbolic_shape %2248#0, [%292], affine_map<()[s0] -> (4, 32, s0 * 32, 128)> : !torch.vtensor<[4,32,?,128],f16>
    %int1_2424 = torch.constant.int 1
    %int2_2425 = torch.constant.int 2
    %2249 = torch.aten.transpose.int %2248#0, %int1_2424, %int2_2425 : !torch.vtensor<[4,32,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %2249, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int4_2426 = torch.constant.int 4
    %int4096_2427 = torch.constant.int 4096
    %2250 = torch.prim.ListConstruct %int4_2426, %2147, %int4096_2427 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2251 = torch.aten.view %2249, %2250 : !torch.vtensor<[4,?,32,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %2251, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int-2_2428 = torch.constant.int -2
    %int-1_2429 = torch.constant.int -1
    %2252 = torch.aten.transpose.int %86, %int-2_2428, %int-1_2429 : !torch.vtensor<[4096,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,4096],f16>
    %int4_2430 = torch.constant.int 4
    %2253 = torch.aten.mul.int %int4_2430, %2147 : !torch.int, !torch.int -> !torch.int
    %int4096_2431 = torch.constant.int 4096
    %2254 = torch.prim.ListConstruct %2253, %int4096_2431 : (!torch.int, !torch.int) -> !torch.list<int>
    %2255 = torch.aten.view %2251, %2254 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %2255, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %2256 = torch.aten.mm %2255, %2252 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %2256, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_2432 = torch.constant.int 4
    %int4096_2433 = torch.constant.int 4096
    %2257 = torch.prim.ListConstruct %int4_2432, %2147, %int4096_2433 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2258 = torch.aten.view %2256, %2257 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %2258, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_2434 = torch.constant.int 1
    %2259 = torch.aten.add.Tensor %2096, %2258, %int1_2434 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %2259, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int6_2435 = torch.constant.int 6
    %2260 = torch.prims.convert_element_type %2259, %int6_2435 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %2260, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_2436 = torch.constant.int 2
    %2261 = torch.aten.pow.Tensor_Scalar %2260, %int2_2436 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %2261, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int-1_2437 = torch.constant.int -1
    %2262 = torch.prim.ListConstruct %int-1_2437 : (!torch.int) -> !torch.list<int>
    %true_2438 = torch.constant.bool true
    %none_2439 = torch.constant.none
    %2263 = torch.aten.mean.dim %2261, %2262, %true_2438, %none_2439 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %2263, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_2440 = torch.constant.float 9.9999997473787516E-6
    %int1_2441 = torch.constant.int 1
    %2264 = torch.aten.add.Scalar %2263, %float9.999990e-06_2440, %int1_2441 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %2264, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %2265 = torch.aten.rsqrt %2264 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %2265, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %2266 = torch.aten.mul.Tensor %2260, %2265 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %2266, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %2267 = torch.aten.mul.Tensor %87, %2266 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %2267, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int5_2442 = torch.constant.int 5
    %2268 = torch.prims.convert_element_type %2267, %int5_2442 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %2268, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int-2_2443 = torch.constant.int -2
    %int-1_2444 = torch.constant.int -1
    %2269 = torch.aten.transpose.int %88, %int-2_2443, %int-1_2444 : !torch.vtensor<[14336,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,14336],f16>
    %int4_2445 = torch.constant.int 4
    %2270 = torch.aten.mul.int %int4_2445, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_2446 = torch.constant.int 4096
    %2271 = torch.prim.ListConstruct %2270, %int4096_2446 : (!torch.int, !torch.int) -> !torch.list<int>
    %2272 = torch.aten.view %2268, %2271 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %2272, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %2273 = torch.aten.mm %2272, %2269 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,14336],f16> -> !torch.vtensor<[?,14336],f16>
    torch.bind_symbolic_shape %2273, [%292], affine_map<()[s0] -> (s0 * 128, 14336)> : !torch.vtensor<[?,14336],f16>
    %int4_2447 = torch.constant.int 4
    %int14336_2448 = torch.constant.int 14336
    %2274 = torch.prim.ListConstruct %int4_2447, %294, %int14336_2448 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2275 = torch.aten.view %2273, %2274 : !torch.vtensor<[?,14336],f16>, !torch.list<int> -> !torch.vtensor<[4,?,14336],f16>
    torch.bind_symbolic_shape %2275, [%292], affine_map<()[s0] -> (4, s0 * 32, 14336)> : !torch.vtensor<[4,?,14336],f16>
    %2276 = torch.aten.silu %2275 : !torch.vtensor<[4,?,14336],f16> -> !torch.vtensor<[4,?,14336],f16>
    torch.bind_symbolic_shape %2276, [%292], affine_map<()[s0] -> (4, s0 * 32, 14336)> : !torch.vtensor<[4,?,14336],f16>
    %int-2_2449 = torch.constant.int -2
    %int-1_2450 = torch.constant.int -1
    %2277 = torch.aten.transpose.int %89, %int-2_2449, %int-1_2450 : !torch.vtensor<[14336,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,14336],f16>
    %int4_2451 = torch.constant.int 4
    %2278 = torch.aten.mul.int %int4_2451, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_2452 = torch.constant.int 4096
    %2279 = torch.prim.ListConstruct %2278, %int4096_2452 : (!torch.int, !torch.int) -> !torch.list<int>
    %2280 = torch.aten.view %2268, %2279 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %2280, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %2281 = torch.aten.mm %2280, %2277 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,14336],f16> -> !torch.vtensor<[?,14336],f16>
    torch.bind_symbolic_shape %2281, [%292], affine_map<()[s0] -> (s0 * 128, 14336)> : !torch.vtensor<[?,14336],f16>
    %int4_2453 = torch.constant.int 4
    %int14336_2454 = torch.constant.int 14336
    %2282 = torch.prim.ListConstruct %int4_2453, %294, %int14336_2454 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2283 = torch.aten.view %2281, %2282 : !torch.vtensor<[?,14336],f16>, !torch.list<int> -> !torch.vtensor<[4,?,14336],f16>
    torch.bind_symbolic_shape %2283, [%292], affine_map<()[s0] -> (4, s0 * 32, 14336)> : !torch.vtensor<[4,?,14336],f16>
    %2284 = torch.aten.mul.Tensor %2276, %2283 : !torch.vtensor<[4,?,14336],f16>, !torch.vtensor<[4,?,14336],f16> -> !torch.vtensor<[4,?,14336],f16>
    torch.bind_symbolic_shape %2284, [%292], affine_map<()[s0] -> (4, s0 * 32, 14336)> : !torch.vtensor<[4,?,14336],f16>
    %int-2_2455 = torch.constant.int -2
    %int-1_2456 = torch.constant.int -1
    %2285 = torch.aten.transpose.int %90, %int-2_2455, %int-1_2456 : !torch.vtensor<[4096,14336],f16>, !torch.int, !torch.int -> !torch.vtensor<[14336,4096],f16>
    %int1_2457 = torch.constant.int 1
    %2286 = torch.aten.size.int %2275, %int1_2457 : !torch.vtensor<[4,?,14336],f16>, !torch.int -> !torch.int
    %int4_2458 = torch.constant.int 4
    %2287 = torch.aten.mul.int %int4_2458, %2286 : !torch.int, !torch.int -> !torch.int
    %int14336_2459 = torch.constant.int 14336
    %2288 = torch.prim.ListConstruct %2287, %int14336_2459 : (!torch.int, !torch.int) -> !torch.list<int>
    %2289 = torch.aten.view %2284, %2288 : !torch.vtensor<[4,?,14336],f16>, !torch.list<int> -> !torch.vtensor<[?,14336],f16>
    torch.bind_symbolic_shape %2289, [%292], affine_map<()[s0] -> (s0 * 128, 14336)> : !torch.vtensor<[?,14336],f16>
    %2290 = torch.aten.mm %2289, %2285 : !torch.vtensor<[?,14336],f16>, !torch.vtensor<[14336,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %2290, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_2460 = torch.constant.int 4
    %int4096_2461 = torch.constant.int 4096
    %2291 = torch.prim.ListConstruct %int4_2460, %2286, %int4096_2461 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2292 = torch.aten.view %2290, %2291 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %2292, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_2462 = torch.constant.int 1
    %2293 = torch.aten.add.Tensor %2259, %2292, %int1_2462 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %2293, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int6_2463 = torch.constant.int 6
    %2294 = torch.prims.convert_element_type %2293, %int6_2463 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %2294, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_2464 = torch.constant.int 2
    %2295 = torch.aten.pow.Tensor_Scalar %2294, %int2_2464 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %2295, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int-1_2465 = torch.constant.int -1
    %2296 = torch.prim.ListConstruct %int-1_2465 : (!torch.int) -> !torch.list<int>
    %true_2466 = torch.constant.bool true
    %none_2467 = torch.constant.none
    %2297 = torch.aten.mean.dim %2295, %2296, %true_2466, %none_2467 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %2297, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_2468 = torch.constant.float 9.9999997473787516E-6
    %int1_2469 = torch.constant.int 1
    %2298 = torch.aten.add.Scalar %2297, %float9.999990e-06_2468, %int1_2469 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %2298, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %2299 = torch.aten.rsqrt %2298 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %2299, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %2300 = torch.aten.mul.Tensor %2294, %2299 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %2300, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %2301 = torch.aten.mul.Tensor %91, %2300 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %2301, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int5_2470 = torch.constant.int 5
    %2302 = torch.prims.convert_element_type %2301, %int5_2470 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %2302, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int-2_2471 = torch.constant.int -2
    %int-1_2472 = torch.constant.int -1
    %2303 = torch.aten.transpose.int %92, %int-2_2471, %int-1_2472 : !torch.vtensor<[4096,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,4096],f16>
    %int4_2473 = torch.constant.int 4
    %2304 = torch.aten.mul.int %int4_2473, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_2474 = torch.constant.int 4096
    %2305 = torch.prim.ListConstruct %2304, %int4096_2474 : (!torch.int, !torch.int) -> !torch.list<int>
    %2306 = torch.aten.view %2302, %2305 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %2306, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %2307 = torch.aten.mm %2306, %2303 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %2307, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_2475 = torch.constant.int 4
    %int4096_2476 = torch.constant.int 4096
    %2308 = torch.prim.ListConstruct %int4_2475, %294, %int4096_2476 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2309 = torch.aten.view %2307, %2308 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %2309, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int-2_2477 = torch.constant.int -2
    %int-1_2478 = torch.constant.int -1
    %2310 = torch.aten.transpose.int %93, %int-2_2477, %int-1_2478 : !torch.vtensor<[1024,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,1024],f16>
    %int4_2479 = torch.constant.int 4
    %2311 = torch.aten.mul.int %int4_2479, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_2480 = torch.constant.int 4096
    %2312 = torch.prim.ListConstruct %2311, %int4096_2480 : (!torch.int, !torch.int) -> !torch.list<int>
    %2313 = torch.aten.view %2302, %2312 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %2313, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %2314 = torch.aten.mm %2313, %2310 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1024],f16> -> !torch.vtensor<[?,1024],f16>
    torch.bind_symbolic_shape %2314, [%292], affine_map<()[s0] -> (s0 * 128, 1024)> : !torch.vtensor<[?,1024],f16>
    %int4_2481 = torch.constant.int 4
    %int1024_2482 = torch.constant.int 1024
    %2315 = torch.prim.ListConstruct %int4_2481, %294, %int1024_2482 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2316 = torch.aten.view %2314, %2315 : !torch.vtensor<[?,1024],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1024],f16>
    torch.bind_symbolic_shape %2316, [%292], affine_map<()[s0] -> (4, s0 * 32, 1024)> : !torch.vtensor<[4,?,1024],f16>
    %int-2_2483 = torch.constant.int -2
    %int-1_2484 = torch.constant.int -1
    %2317 = torch.aten.transpose.int %94, %int-2_2483, %int-1_2484 : !torch.vtensor<[1024,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,1024],f16>
    %int4_2485 = torch.constant.int 4
    %2318 = torch.aten.mul.int %int4_2485, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_2486 = torch.constant.int 4096
    %2319 = torch.prim.ListConstruct %2318, %int4096_2486 : (!torch.int, !torch.int) -> !torch.list<int>
    %2320 = torch.aten.view %2302, %2319 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %2320, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %2321 = torch.aten.mm %2320, %2317 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1024],f16> -> !torch.vtensor<[?,1024],f16>
    torch.bind_symbolic_shape %2321, [%292], affine_map<()[s0] -> (s0 * 128, 1024)> : !torch.vtensor<[?,1024],f16>
    %int4_2487 = torch.constant.int 4
    %int1024_2488 = torch.constant.int 1024
    %2322 = torch.prim.ListConstruct %int4_2487, %294, %int1024_2488 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2323 = torch.aten.view %2321, %2322 : !torch.vtensor<[?,1024],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1024],f16>
    torch.bind_symbolic_shape %2323, [%292], affine_map<()[s0] -> (4, s0 * 32, 1024)> : !torch.vtensor<[4,?,1024],f16>
    %int4_2489 = torch.constant.int 4
    %int32_2490 = torch.constant.int 32
    %int128_2491 = torch.constant.int 128
    %2324 = torch.prim.ListConstruct %int4_2489, %294, %int32_2490, %int128_2491 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2325 = torch.aten.view %2309, %2324 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %2325, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int4_2492 = torch.constant.int 4
    %int8_2493 = torch.constant.int 8
    %int128_2494 = torch.constant.int 128
    %2326 = torch.prim.ListConstruct %int4_2492, %294, %int8_2493, %int128_2494 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2327 = torch.aten.view %2316, %2326 : !torch.vtensor<[4,?,1024],f16>, !torch.list<int> -> !torch.vtensor<[4,?,8,128],f16>
    torch.bind_symbolic_shape %2327, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 128)> : !torch.vtensor<[4,?,8,128],f16>
    %int4_2495 = torch.constant.int 4
    %int8_2496 = torch.constant.int 8
    %int128_2497 = torch.constant.int 128
    %2328 = torch.prim.ListConstruct %int4_2495, %294, %int8_2496, %int128_2497 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2329 = torch.aten.view %2323, %2328 : !torch.vtensor<[4,?,1024],f16>, !torch.list<int> -> !torch.vtensor<[4,?,8,128],f16>
    torch.bind_symbolic_shape %2329, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 128)> : !torch.vtensor<[4,?,8,128],f16>
    %int131072_2498 = torch.constant.int 131072
    %none_2499 = torch.constant.none
    %none_2500 = torch.constant.none
    %cpu_2501 = torch.constant.device "cpu"
    %false_2502 = torch.constant.bool false
    %2330 = torch.aten.arange %int131072_2498, %none_2499, %none_2500, %cpu_2501, %false_2502 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64>
    %int0_2503 = torch.constant.int 0
    %int128_2504 = torch.constant.int 128
    %int2_2505 = torch.constant.int 2
    %none_2506 = torch.constant.none
    %none_2507 = torch.constant.none
    %cpu_2508 = torch.constant.device "cpu"
    %false_2509 = torch.constant.bool false
    %2331 = torch.aten.arange.start_step %int0_2503, %int128_2504, %int2_2505, %none_2506, %none_2507, %cpu_2508, %false_2509 : !torch.int, !torch.int, !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64>
    %int0_2510 = torch.constant.int 0
    %int0_2511 = torch.constant.int 0
    %int64_2512 = torch.constant.int 64
    %int1_2513 = torch.constant.int 1
    %2332 = torch.aten.slice.Tensor %2331, %int0_2510, %int0_2511, %int64_2512, %int1_2513 : !torch.vtensor<[64],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[64],si64>
    %int6_2514 = torch.constant.int 6
    %2333 = torch.prims.convert_element_type %2332, %int6_2514 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32>
    %int128_2515 = torch.constant.int 128
    %2334 = torch.aten.div.Scalar %2333, %int128_2515 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32>
    %float5.000000e05_2516 = torch.constant.float 5.000000e+05
    %2335 = torch.aten.pow.Scalar %float5.000000e05_2516, %2334 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %2336 = torch.aten.reciprocal %2335 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %float1.000000e00_2517 = torch.constant.float 1.000000e+00
    %2337 = torch.aten.mul.Scalar %2336, %float1.000000e00_2517 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32>
    %int131072_2518 = torch.constant.int 131072
    %int1_2519 = torch.constant.int 1
    %2338 = torch.prim.ListConstruct %int131072_2518, %int1_2519 : (!torch.int, !torch.int) -> !torch.list<int>
    %2339 = torch.aten.view %2330, %2338 : !torch.vtensor<[131072],si64>, !torch.list<int> -> !torch.vtensor<[131072,1],si64>
    %2340 = torch.aten.mul.Tensor %2339, %2337 : !torch.vtensor<[131072,1],si64>, !torch.vtensor<[64],f32> -> !torch.vtensor<[131072,64],f32>
    %2341 = torch.aten.cos %2340 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %2342 = torch.aten.sin %2340 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %2343 = torch.aten.complex %2341, %2342 : !torch.vtensor<[131072,64],f32>, !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],complex<f32>>
    %int1_2520 = torch.constant.int 1
    %2344 = torch.aten.size.int %2309, %int1_2520 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.int
    %int0_2521 = torch.constant.int 0
    %2345 = torch.aten.add.int %int0_2521, %2344 : !torch.int, !torch.int -> !torch.int
    %int0_2522 = torch.constant.int 0
    %int0_2523 = torch.constant.int 0
    %int1_2524 = torch.constant.int 1
    %2346 = torch.aten.slice.Tensor %2343, %int0_2522, %int0_2523, %2345, %int1_2524 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %2346, [%292], affine_map<()[s0] -> (s0 * 32, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_2525 = torch.constant.int 1
    %int0_2526 = torch.constant.int 0
    %int9223372036854775807_2527 = torch.constant.int 9223372036854775807
    %int1_2528 = torch.constant.int 1
    %2347 = torch.aten.slice.Tensor %2346, %int1_2525, %int0_2526, %int9223372036854775807_2527, %int1_2528 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %2347, [%292], affine_map<()[s0] -> (s0 * 32, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_2529 = torch.constant.int 0
    %2348 = torch.aten.unsqueeze %2347, %int0_2529 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %2348, [%292], affine_map<()[s0] -> (1, s0 * 32, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_2530 = torch.constant.int 2
    %2349 = torch.aten.unsqueeze %2348, %int2_2530 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %2349, [%292], affine_map<()[s0] -> (1, s0 * 32, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_2531 = torch.constant.int 3
    %int0_2532 = torch.constant.int 0
    %int9223372036854775807_2533 = torch.constant.int 9223372036854775807
    %int1_2534 = torch.constant.int 1
    %2350 = torch.aten.slice.Tensor %2349, %int3_2531, %int0_2532, %int9223372036854775807_2533, %int1_2534 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %2350, [%292], affine_map<()[s0] -> (1, s0 * 32, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %2351 = torch_c.to_builtin_tensor %2325 : !torch.vtensor<[4,?,32,128],f16> -> tensor<4x?x32x128xf16>
    %c1_2535 = arith.constant 1 : index
    %dim_2536 = tensor.dim %2351, %c1_2535 : tensor<4x?x32x128xf16>
    %2352 = flow.tensor.bitcast %2351 : tensor<4x?x32x128xf16>{%dim_2536} -> tensor<4x?x32x64xcomplex<f16>>{%dim_2536}
    %2353 = torch_c.from_builtin_tensor %2352 : tensor<4x?x32x64xcomplex<f16>> -> !torch.vtensor<[4,?,32,64],complex<f16>>
    torch.bind_symbolic_shape %2353, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 64)> : !torch.vtensor<[4,?,32,64],complex<f16>>
    %2354 = torch.aten.mul.Tensor %2353, %2350 : !torch.vtensor<[4,?,32,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,32,64],complex<f32>>
    torch.bind_symbolic_shape %2354, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 64)> : !torch.vtensor<[4,?,32,64],complex<f32>>
    %2355 = torch_c.to_builtin_tensor %2354 : !torch.vtensor<[4,?,32,64],complex<f32>> -> tensor<4x?x32x64xcomplex<f32>>
    %c1_2537 = arith.constant 1 : index
    %dim_2538 = tensor.dim %2355, %c1_2537 : tensor<4x?x32x64xcomplex<f32>>
    %2356 = flow.tensor.bitcast %2355 : tensor<4x?x32x64xcomplex<f32>>{%dim_2538} -> tensor<4x?x32x128xf32>{%dim_2538}
    %2357 = torch_c.from_builtin_tensor %2356 : tensor<4x?x32x128xf32> -> !torch.vtensor<[4,?,32,128],f32>
    torch.bind_symbolic_shape %2357, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f32>
    %int5_2539 = torch.constant.int 5
    %2358 = torch.prims.convert_element_type %2357, %int5_2539 : !torch.vtensor<[4,?,32,128],f32>, !torch.int -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %2358, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int131072_2540 = torch.constant.int 131072
    %none_2541 = torch.constant.none
    %none_2542 = torch.constant.none
    %cpu_2543 = torch.constant.device "cpu"
    %false_2544 = torch.constant.bool false
    %2359 = torch.aten.arange %int131072_2540, %none_2541, %none_2542, %cpu_2543, %false_2544 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64>
    %int0_2545 = torch.constant.int 0
    %int128_2546 = torch.constant.int 128
    %int2_2547 = torch.constant.int 2
    %none_2548 = torch.constant.none
    %none_2549 = torch.constant.none
    %cpu_2550 = torch.constant.device "cpu"
    %false_2551 = torch.constant.bool false
    %2360 = torch.aten.arange.start_step %int0_2545, %int128_2546, %int2_2547, %none_2548, %none_2549, %cpu_2550, %false_2551 : !torch.int, !torch.int, !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64>
    %int0_2552 = torch.constant.int 0
    %int0_2553 = torch.constant.int 0
    %int64_2554 = torch.constant.int 64
    %int1_2555 = torch.constant.int 1
    %2361 = torch.aten.slice.Tensor %2360, %int0_2552, %int0_2553, %int64_2554, %int1_2555 : !torch.vtensor<[64],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[64],si64>
    %int6_2556 = torch.constant.int 6
    %2362 = torch.prims.convert_element_type %2361, %int6_2556 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32>
    %int128_2557 = torch.constant.int 128
    %2363 = torch.aten.div.Scalar %2362, %int128_2557 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32>
    %float5.000000e05_2558 = torch.constant.float 5.000000e+05
    %2364 = torch.aten.pow.Scalar %float5.000000e05_2558, %2363 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %2365 = torch.aten.reciprocal %2364 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %float1.000000e00_2559 = torch.constant.float 1.000000e+00
    %2366 = torch.aten.mul.Scalar %2365, %float1.000000e00_2559 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32>
    %int131072_2560 = torch.constant.int 131072
    %int1_2561 = torch.constant.int 1
    %2367 = torch.prim.ListConstruct %int131072_2560, %int1_2561 : (!torch.int, !torch.int) -> !torch.list<int>
    %2368 = torch.aten.view %2359, %2367 : !torch.vtensor<[131072],si64>, !torch.list<int> -> !torch.vtensor<[131072,1],si64>
    %2369 = torch.aten.mul.Tensor %2368, %2366 : !torch.vtensor<[131072,1],si64>, !torch.vtensor<[64],f32> -> !torch.vtensor<[131072,64],f32>
    %2370 = torch.aten.cos %2369 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %2371 = torch.aten.sin %2369 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %2372 = torch.aten.complex %2370, %2371 : !torch.vtensor<[131072,64],f32>, !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],complex<f32>>
    %int1_2562 = torch.constant.int 1
    %2373 = torch.aten.size.int %2316, %int1_2562 : !torch.vtensor<[4,?,1024],f16>, !torch.int -> !torch.int
    %int0_2563 = torch.constant.int 0
    %2374 = torch.aten.add.int %int0_2563, %2373 : !torch.int, !torch.int -> !torch.int
    %int0_2564 = torch.constant.int 0
    %int0_2565 = torch.constant.int 0
    %int1_2566 = torch.constant.int 1
    %2375 = torch.aten.slice.Tensor %2372, %int0_2564, %int0_2565, %2374, %int1_2566 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %2375, [%292], affine_map<()[s0] -> (s0 * 32, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_2567 = torch.constant.int 1
    %int0_2568 = torch.constant.int 0
    %int9223372036854775807_2569 = torch.constant.int 9223372036854775807
    %int1_2570 = torch.constant.int 1
    %2376 = torch.aten.slice.Tensor %2375, %int1_2567, %int0_2568, %int9223372036854775807_2569, %int1_2570 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %2376, [%292], affine_map<()[s0] -> (s0 * 32, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_2571 = torch.constant.int 0
    %2377 = torch.aten.unsqueeze %2376, %int0_2571 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %2377, [%292], affine_map<()[s0] -> (1, s0 * 32, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_2572 = torch.constant.int 2
    %2378 = torch.aten.unsqueeze %2377, %int2_2572 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %2378, [%292], affine_map<()[s0] -> (1, s0 * 32, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_2573 = torch.constant.int 3
    %int0_2574 = torch.constant.int 0
    %int9223372036854775807_2575 = torch.constant.int 9223372036854775807
    %int1_2576 = torch.constant.int 1
    %2379 = torch.aten.slice.Tensor %2378, %int3_2573, %int0_2574, %int9223372036854775807_2575, %int1_2576 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %2379, [%292], affine_map<()[s0] -> (1, s0 * 32, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %2380 = torch_c.to_builtin_tensor %2327 : !torch.vtensor<[4,?,8,128],f16> -> tensor<4x?x8x128xf16>
    %c1_2577 = arith.constant 1 : index
    %dim_2578 = tensor.dim %2380, %c1_2577 : tensor<4x?x8x128xf16>
    %2381 = flow.tensor.bitcast %2380 : tensor<4x?x8x128xf16>{%dim_2578} -> tensor<4x?x8x64xcomplex<f16>>{%dim_2578}
    %2382 = torch_c.from_builtin_tensor %2381 : tensor<4x?x8x64xcomplex<f16>> -> !torch.vtensor<[4,?,8,64],complex<f16>>
    torch.bind_symbolic_shape %2382, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 64)> : !torch.vtensor<[4,?,8,64],complex<f16>>
    %2383 = torch.aten.mul.Tensor %2382, %2379 : !torch.vtensor<[4,?,8,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,8,64],complex<f32>>
    torch.bind_symbolic_shape %2383, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 64)> : !torch.vtensor<[4,?,8,64],complex<f32>>
    %2384 = torch_c.to_builtin_tensor %2383 : !torch.vtensor<[4,?,8,64],complex<f32>> -> tensor<4x?x8x64xcomplex<f32>>
    %c1_2579 = arith.constant 1 : index
    %dim_2580 = tensor.dim %2384, %c1_2579 : tensor<4x?x8x64xcomplex<f32>>
    %2385 = flow.tensor.bitcast %2384 : tensor<4x?x8x64xcomplex<f32>>{%dim_2580} -> tensor<4x?x8x128xf32>{%dim_2580}
    %2386 = torch_c.from_builtin_tensor %2385 : tensor<4x?x8x128xf32> -> !torch.vtensor<[4,?,8,128],f32>
    torch.bind_symbolic_shape %2386, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 128)> : !torch.vtensor<[4,?,8,128],f32>
    %int5_2581 = torch.constant.int 5
    %2387 = torch.prims.convert_element_type %2386, %int5_2581 : !torch.vtensor<[4,?,8,128],f32>, !torch.int -> !torch.vtensor<[4,?,8,128],f16>
    torch.bind_symbolic_shape %2387, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 128)> : !torch.vtensor<[4,?,8,128],f16>
    %int64_2582 = torch.constant.int 64
    %2388 = torch.aten.mul.Scalar %arg2, %int64_2582 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %2388, [%292], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int20 = torch.constant.int 20
    %int1_2583 = torch.constant.int 1
    %2389 = torch.aten.add.Scalar %2388, %int20, %int1_2583 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %2389, [%292], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int4_2584 = torch.constant.int 4
    %int32_2585 = torch.constant.int 32
    %int8_2586 = torch.constant.int 8
    %int128_2587 = torch.constant.int 128
    %2390 = torch.prim.ListConstruct %int4_2584, %425, %int32_2585, %int8_2586, %int128_2587 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2391 = torch.aten.view %2387, %2390 : !torch.vtensor<[4,?,8,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,8,128],f16>
    torch.bind_symbolic_shape %2391, [%292], affine_map<()[s0] -> (4, s0, 32, 8, 128)> : !torch.vtensor<[4,?,32,8,128],f16>
    %int4_2588 = torch.constant.int 4
    %2392 = torch.aten.mul.int %int4_2588, %425 : !torch.int, !torch.int -> !torch.int
    %int32_2589 = torch.constant.int 32
    %int8_2590 = torch.constant.int 8
    %int128_2591 = torch.constant.int 128
    %2393 = torch.prim.ListConstruct %2392, %int32_2589, %int8_2590, %int128_2591 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2394 = torch.aten.view %2391, %2393 : !torch.vtensor<[4,?,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %2394, [%292], affine_map<()[s0] -> (s0 * 4, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int4_2592 = torch.constant.int 4
    %2395 = torch.aten.mul.int %int4_2592, %425 : !torch.int, !torch.int -> !torch.int
    %2396 = torch.prim.ListConstruct %2395 : (!torch.int) -> !torch.list<int>
    %2397 = torch.aten.view %2389, %2396 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %2397, [%292], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int32_2593 = torch.constant.int 32
    %int2_2594 = torch.constant.int 2
    %int32_2595 = torch.constant.int 32
    %int8_2596 = torch.constant.int 8
    %int128_2597 = torch.constant.int 128
    %2398 = torch.prim.ListConstruct %416, %int32_2593, %int2_2594, %int32_2595, %int8_2596, %int128_2597 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2399 = torch.aten.view %2231, %2398 : !torch.vtensor<[?,2097152],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f16>
    torch.bind_symbolic_shape %2399, [%293], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f16>
    %int32_2598 = torch.constant.int 32
    %2400 = torch.aten.mul.int %416, %int32_2598 : !torch.int, !torch.int -> !torch.int
    %int2_2599 = torch.constant.int 2
    %2401 = torch.aten.mul.int %2400, %int2_2599 : !torch.int, !torch.int -> !torch.int
    %int32_2600 = torch.constant.int 32
    %int8_2601 = torch.constant.int 8
    %int128_2602 = torch.constant.int 128
    %2402 = torch.prim.ListConstruct %2401, %int32_2600, %int8_2601, %int128_2602 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2403 = torch.aten.view %2399, %2402 : !torch.vtensor<[?,32,2,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %2403, [%293], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %2404 = torch.prim.ListConstruct %2397 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_2603 = torch.constant.bool false
    %2405 = torch.aten.index_put %2403, %2404, %2394, %false_2603 : !torch.vtensor<[?,32,8,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,32,8,128],f16>, !torch.bool -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %2405, [%293], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int32_2604 = torch.constant.int 32
    %int2_2605 = torch.constant.int 2
    %int32_2606 = torch.constant.int 32
    %int8_2607 = torch.constant.int 8
    %int128_2608 = torch.constant.int 128
    %2406 = torch.prim.ListConstruct %416, %int32_2604, %int2_2605, %int32_2606, %int8_2607, %int128_2608 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2407 = torch.aten.view %2405, %2406 : !torch.vtensor<[?,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f16>
    torch.bind_symbolic_shape %2407, [%293], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f16>
    %int2097152_2609 = torch.constant.int 2097152
    %2408 = torch.prim.ListConstruct %416, %int2097152_2609 : (!torch.int, !torch.int) -> !torch.list<int>
    %2409 = torch.aten.view %2407, %2408 : !torch.vtensor<[?,32,2,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,2097152],f16>
    torch.bind_symbolic_shape %2409, [%293], affine_map<()[s0] -> (s0, 2097152)> : !torch.vtensor<[?,2097152],f16>
    %int32_2610 = torch.constant.int 32
    %int2_2611 = torch.constant.int 2
    %int32_2612 = torch.constant.int 32
    %int8_2613 = torch.constant.int 8
    %int128_2614 = torch.constant.int 128
    %2410 = torch.prim.ListConstruct %416, %int32_2610, %int2_2611, %int32_2612, %int8_2613, %int128_2614 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2411 = torch.aten.view %2409, %2410 : !torch.vtensor<[?,2097152],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f16>
    torch.bind_symbolic_shape %2411, [%293], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f16>
    %int32_2615 = torch.constant.int 32
    %int8_2616 = torch.constant.int 8
    %int128_2617 = torch.constant.int 128
    %2412 = torch.prim.ListConstruct %2401, %int32_2615, %int8_2616, %int128_2617 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2413 = torch.aten.view %2411, %2412 : !torch.vtensor<[?,32,2,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %2413, [%293], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int4_2618 = torch.constant.int 4
    %int32_2619 = torch.constant.int 32
    %int8_2620 = torch.constant.int 8
    %int128_2621 = torch.constant.int 128
    %2414 = torch.prim.ListConstruct %int4_2618, %425, %int32_2619, %int8_2620, %int128_2621 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2415 = torch.aten.view %2329, %2414 : !torch.vtensor<[4,?,8,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,8,128],f16>
    torch.bind_symbolic_shape %2415, [%292], affine_map<()[s0] -> (4, s0, 32, 8, 128)> : !torch.vtensor<[4,?,32,8,128],f16>
    %int4_2622 = torch.constant.int 4
    %2416 = torch.aten.mul.int %int4_2622, %425 : !torch.int, !torch.int -> !torch.int
    %int32_2623 = torch.constant.int 32
    %int8_2624 = torch.constant.int 8
    %int128_2625 = torch.constant.int 128
    %2417 = torch.prim.ListConstruct %2416, %int32_2623, %int8_2624, %int128_2625 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2418 = torch.aten.view %2415, %2417 : !torch.vtensor<[4,?,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %2418, [%292], affine_map<()[s0] -> (s0 * 4, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int1_2626 = torch.constant.int 1
    %int1_2627 = torch.constant.int 1
    %2419 = torch.aten.add.Scalar %2389, %int1_2626, %int1_2627 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %2419, [%292], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int4_2628 = torch.constant.int 4
    %2420 = torch.aten.mul.int %int4_2628, %425 : !torch.int, !torch.int -> !torch.int
    %2421 = torch.prim.ListConstruct %2420 : (!torch.int) -> !torch.list<int>
    %2422 = torch.aten.view %2419, %2421 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %2422, [%292], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %2423 = torch.prim.ListConstruct %2422 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_2629 = torch.constant.bool false
    %2424 = torch.aten.index_put %2413, %2423, %2418, %false_2629 : !torch.vtensor<[?,32,8,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,32,8,128],f16>, !torch.bool -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %2424, [%293], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int32_2630 = torch.constant.int 32
    %int2_2631 = torch.constant.int 2
    %int32_2632 = torch.constant.int 32
    %int8_2633 = torch.constant.int 8
    %int128_2634 = torch.constant.int 128
    %2425 = torch.prim.ListConstruct %416, %int32_2630, %int2_2631, %int32_2632, %int8_2633, %int128_2634 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2426 = torch.aten.view %2424, %2425 : !torch.vtensor<[?,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f16>
    torch.bind_symbolic_shape %2426, [%293], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f16>
    %int2097152_2635 = torch.constant.int 2097152
    %2427 = torch.prim.ListConstruct %416, %int2097152_2635 : (!torch.int, !torch.int) -> !torch.list<int>
    %2428 = torch.aten.view %2426, %2427 : !torch.vtensor<[?,32,2,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,2097152],f16>
    torch.bind_symbolic_shape %2428, [%293], affine_map<()[s0] -> (s0, 2097152)> : !torch.vtensor<[?,2097152],f16>
    %int-2_2636 = torch.constant.int -2
    %2429 = torch.aten.unsqueeze %2387, %int-2_2636 : !torch.vtensor<[4,?,8,128],f16>, !torch.int -> !torch.vtensor<[4,?,8,1,128],f16>
    torch.bind_symbolic_shape %2429, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 1, 128)> : !torch.vtensor<[4,?,8,1,128],f16>
    %int4_2637 = torch.constant.int 4
    %int8_2638 = torch.constant.int 8
    %int4_2639 = torch.constant.int 4
    %int128_2640 = torch.constant.int 128
    %2430 = torch.prim.ListConstruct %int4_2637, %2373, %int8_2638, %int4_2639, %int128_2640 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_2641 = torch.constant.bool false
    %2431 = torch.aten.expand %2429, %2430, %false_2641 : !torch.vtensor<[4,?,8,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,8,4,128],f16>
    torch.bind_symbolic_shape %2431, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 4, 128)> : !torch.vtensor<[4,?,8,4,128],f16>
    %int0_2642 = torch.constant.int 0
    %2432 = torch.aten.clone %2431, %int0_2642 : !torch.vtensor<[4,?,8,4,128],f16>, !torch.int -> !torch.vtensor<[4,?,8,4,128],f16>
    torch.bind_symbolic_shape %2432, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 4, 128)> : !torch.vtensor<[4,?,8,4,128],f16>
    %int4_2643 = torch.constant.int 4
    %int32_2644 = torch.constant.int 32
    %int128_2645 = torch.constant.int 128
    %2433 = torch.prim.ListConstruct %int4_2643, %2373, %int32_2644, %int128_2645 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2434 = torch.aten._unsafe_view %2432, %2433 : !torch.vtensor<[4,?,8,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %2434, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int-2_2646 = torch.constant.int -2
    %2435 = torch.aten.unsqueeze %2329, %int-2_2646 : !torch.vtensor<[4,?,8,128],f16>, !torch.int -> !torch.vtensor<[4,?,8,1,128],f16>
    torch.bind_symbolic_shape %2435, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 1, 128)> : !torch.vtensor<[4,?,8,1,128],f16>
    %int1_2647 = torch.constant.int 1
    %2436 = torch.aten.size.int %2323, %int1_2647 : !torch.vtensor<[4,?,1024],f16>, !torch.int -> !torch.int
    %int4_2648 = torch.constant.int 4
    %int8_2649 = torch.constant.int 8
    %int4_2650 = torch.constant.int 4
    %int128_2651 = torch.constant.int 128
    %2437 = torch.prim.ListConstruct %int4_2648, %2436, %int8_2649, %int4_2650, %int128_2651 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_2652 = torch.constant.bool false
    %2438 = torch.aten.expand %2435, %2437, %false_2652 : !torch.vtensor<[4,?,8,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,8,4,128],f16>
    torch.bind_symbolic_shape %2438, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 4, 128)> : !torch.vtensor<[4,?,8,4,128],f16>
    %int0_2653 = torch.constant.int 0
    %2439 = torch.aten.clone %2438, %int0_2653 : !torch.vtensor<[4,?,8,4,128],f16>, !torch.int -> !torch.vtensor<[4,?,8,4,128],f16>
    torch.bind_symbolic_shape %2439, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 4, 128)> : !torch.vtensor<[4,?,8,4,128],f16>
    %int4_2654 = torch.constant.int 4
    %int32_2655 = torch.constant.int 32
    %int128_2656 = torch.constant.int 128
    %2440 = torch.prim.ListConstruct %int4_2654, %2436, %int32_2655, %int128_2656 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2441 = torch.aten._unsafe_view %2439, %2440 : !torch.vtensor<[4,?,8,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %2441, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int1_2657 = torch.constant.int 1
    %int2_2658 = torch.constant.int 2
    %2442 = torch.aten.transpose.int %2358, %int1_2657, %int2_2658 : !torch.vtensor<[4,?,32,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,32,?,128],f16>
    torch.bind_symbolic_shape %2442, [%292], affine_map<()[s0] -> (4, 32, s0 * 32, 128)> : !torch.vtensor<[4,32,?,128],f16>
    %int1_2659 = torch.constant.int 1
    %int2_2660 = torch.constant.int 2
    %2443 = torch.aten.transpose.int %2434, %int1_2659, %int2_2660 : !torch.vtensor<[4,?,32,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,32,?,128],f16>
    torch.bind_symbolic_shape %2443, [%292], affine_map<()[s0] -> (4, 32, s0 * 32, 128)> : !torch.vtensor<[4,32,?,128],f16>
    %int1_2661 = torch.constant.int 1
    %int2_2662 = torch.constant.int 2
    %2444 = torch.aten.transpose.int %2441, %int1_2661, %int2_2662 : !torch.vtensor<[4,?,32,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,32,?,128],f16>
    torch.bind_symbolic_shape %2444, [%292], affine_map<()[s0] -> (4, 32, s0 * 32, 128)> : !torch.vtensor<[4,32,?,128],f16>
    %float0.000000e00_2663 = torch.constant.float 0.000000e+00
    %false_2664 = torch.constant.bool false
    %none_2665 = torch.constant.none
    %2445:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%2442, %2443, %2444, %float0.000000e00_2663, %false_2664, %320, %none_2665) : (!torch.vtensor<[4,32,?,128],f16>, !torch.vtensor<[4,32,?,128],f16>, !torch.vtensor<[4,32,?,128],f16>, !torch.float, !torch.bool, !torch.vtensor<[4,1,?,?],f16>, !torch.none) -> (!torch.vtensor<[4,32,?,128],f16>, !torch.vtensor<[4,32,?],f32>) 
    torch.bind_symbolic_shape %2445#0, [%292], affine_map<()[s0] -> (4, 32, s0 * 32, 128)> : !torch.vtensor<[4,32,?,128],f16>
    %int1_2666 = torch.constant.int 1
    %int2_2667 = torch.constant.int 2
    %2446 = torch.aten.transpose.int %2445#0, %int1_2666, %int2_2667 : !torch.vtensor<[4,32,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %2446, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int4_2668 = torch.constant.int 4
    %int4096_2669 = torch.constant.int 4096
    %2447 = torch.prim.ListConstruct %int4_2668, %2344, %int4096_2669 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2448 = torch.aten.view %2446, %2447 : !torch.vtensor<[4,?,32,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %2448, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int-2_2670 = torch.constant.int -2
    %int-1_2671 = torch.constant.int -1
    %2449 = torch.aten.transpose.int %95, %int-2_2670, %int-1_2671 : !torch.vtensor<[4096,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,4096],f16>
    %int4_2672 = torch.constant.int 4
    %2450 = torch.aten.mul.int %int4_2672, %2344 : !torch.int, !torch.int -> !torch.int
    %int4096_2673 = torch.constant.int 4096
    %2451 = torch.prim.ListConstruct %2450, %int4096_2673 : (!torch.int, !torch.int) -> !torch.list<int>
    %2452 = torch.aten.view %2448, %2451 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %2452, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %2453 = torch.aten.mm %2452, %2449 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %2453, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_2674 = torch.constant.int 4
    %int4096_2675 = torch.constant.int 4096
    %2454 = torch.prim.ListConstruct %int4_2674, %2344, %int4096_2675 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2455 = torch.aten.view %2453, %2454 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %2455, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_2676 = torch.constant.int 1
    %2456 = torch.aten.add.Tensor %2293, %2455, %int1_2676 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %2456, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int6_2677 = torch.constant.int 6
    %2457 = torch.prims.convert_element_type %2456, %int6_2677 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %2457, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_2678 = torch.constant.int 2
    %2458 = torch.aten.pow.Tensor_Scalar %2457, %int2_2678 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %2458, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int-1_2679 = torch.constant.int -1
    %2459 = torch.prim.ListConstruct %int-1_2679 : (!torch.int) -> !torch.list<int>
    %true_2680 = torch.constant.bool true
    %none_2681 = torch.constant.none
    %2460 = torch.aten.mean.dim %2458, %2459, %true_2680, %none_2681 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %2460, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_2682 = torch.constant.float 9.9999997473787516E-6
    %int1_2683 = torch.constant.int 1
    %2461 = torch.aten.add.Scalar %2460, %float9.999990e-06_2682, %int1_2683 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %2461, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %2462 = torch.aten.rsqrt %2461 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %2462, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %2463 = torch.aten.mul.Tensor %2457, %2462 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %2463, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %2464 = torch.aten.mul.Tensor %96, %2463 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %2464, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int5_2684 = torch.constant.int 5
    %2465 = torch.prims.convert_element_type %2464, %int5_2684 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %2465, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int-2_2685 = torch.constant.int -2
    %int-1_2686 = torch.constant.int -1
    %2466 = torch.aten.transpose.int %97, %int-2_2685, %int-1_2686 : !torch.vtensor<[14336,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,14336],f16>
    %int4_2687 = torch.constant.int 4
    %2467 = torch.aten.mul.int %int4_2687, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_2688 = torch.constant.int 4096
    %2468 = torch.prim.ListConstruct %2467, %int4096_2688 : (!torch.int, !torch.int) -> !torch.list<int>
    %2469 = torch.aten.view %2465, %2468 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %2469, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %2470 = torch.aten.mm %2469, %2466 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,14336],f16> -> !torch.vtensor<[?,14336],f16>
    torch.bind_symbolic_shape %2470, [%292], affine_map<()[s0] -> (s0 * 128, 14336)> : !torch.vtensor<[?,14336],f16>
    %int4_2689 = torch.constant.int 4
    %int14336_2690 = torch.constant.int 14336
    %2471 = torch.prim.ListConstruct %int4_2689, %294, %int14336_2690 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2472 = torch.aten.view %2470, %2471 : !torch.vtensor<[?,14336],f16>, !torch.list<int> -> !torch.vtensor<[4,?,14336],f16>
    torch.bind_symbolic_shape %2472, [%292], affine_map<()[s0] -> (4, s0 * 32, 14336)> : !torch.vtensor<[4,?,14336],f16>
    %2473 = torch.aten.silu %2472 : !torch.vtensor<[4,?,14336],f16> -> !torch.vtensor<[4,?,14336],f16>
    torch.bind_symbolic_shape %2473, [%292], affine_map<()[s0] -> (4, s0 * 32, 14336)> : !torch.vtensor<[4,?,14336],f16>
    %int-2_2691 = torch.constant.int -2
    %int-1_2692 = torch.constant.int -1
    %2474 = torch.aten.transpose.int %98, %int-2_2691, %int-1_2692 : !torch.vtensor<[14336,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,14336],f16>
    %int4_2693 = torch.constant.int 4
    %2475 = torch.aten.mul.int %int4_2693, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_2694 = torch.constant.int 4096
    %2476 = torch.prim.ListConstruct %2475, %int4096_2694 : (!torch.int, !torch.int) -> !torch.list<int>
    %2477 = torch.aten.view %2465, %2476 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %2477, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %2478 = torch.aten.mm %2477, %2474 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,14336],f16> -> !torch.vtensor<[?,14336],f16>
    torch.bind_symbolic_shape %2478, [%292], affine_map<()[s0] -> (s0 * 128, 14336)> : !torch.vtensor<[?,14336],f16>
    %int4_2695 = torch.constant.int 4
    %int14336_2696 = torch.constant.int 14336
    %2479 = torch.prim.ListConstruct %int4_2695, %294, %int14336_2696 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2480 = torch.aten.view %2478, %2479 : !torch.vtensor<[?,14336],f16>, !torch.list<int> -> !torch.vtensor<[4,?,14336],f16>
    torch.bind_symbolic_shape %2480, [%292], affine_map<()[s0] -> (4, s0 * 32, 14336)> : !torch.vtensor<[4,?,14336],f16>
    %2481 = torch.aten.mul.Tensor %2473, %2480 : !torch.vtensor<[4,?,14336],f16>, !torch.vtensor<[4,?,14336],f16> -> !torch.vtensor<[4,?,14336],f16>
    torch.bind_symbolic_shape %2481, [%292], affine_map<()[s0] -> (4, s0 * 32, 14336)> : !torch.vtensor<[4,?,14336],f16>
    %int-2_2697 = torch.constant.int -2
    %int-1_2698 = torch.constant.int -1
    %2482 = torch.aten.transpose.int %99, %int-2_2697, %int-1_2698 : !torch.vtensor<[4096,14336],f16>, !torch.int, !torch.int -> !torch.vtensor<[14336,4096],f16>
    %int1_2699 = torch.constant.int 1
    %2483 = torch.aten.size.int %2472, %int1_2699 : !torch.vtensor<[4,?,14336],f16>, !torch.int -> !torch.int
    %int4_2700 = torch.constant.int 4
    %2484 = torch.aten.mul.int %int4_2700, %2483 : !torch.int, !torch.int -> !torch.int
    %int14336_2701 = torch.constant.int 14336
    %2485 = torch.prim.ListConstruct %2484, %int14336_2701 : (!torch.int, !torch.int) -> !torch.list<int>
    %2486 = torch.aten.view %2481, %2485 : !torch.vtensor<[4,?,14336],f16>, !torch.list<int> -> !torch.vtensor<[?,14336],f16>
    torch.bind_symbolic_shape %2486, [%292], affine_map<()[s0] -> (s0 * 128, 14336)> : !torch.vtensor<[?,14336],f16>
    %2487 = torch.aten.mm %2486, %2482 : !torch.vtensor<[?,14336],f16>, !torch.vtensor<[14336,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %2487, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_2702 = torch.constant.int 4
    %int4096_2703 = torch.constant.int 4096
    %2488 = torch.prim.ListConstruct %int4_2702, %2483, %int4096_2703 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2489 = torch.aten.view %2487, %2488 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %2489, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_2704 = torch.constant.int 1
    %2490 = torch.aten.add.Tensor %2456, %2489, %int1_2704 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %2490, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int6_2705 = torch.constant.int 6
    %2491 = torch.prims.convert_element_type %2490, %int6_2705 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %2491, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_2706 = torch.constant.int 2
    %2492 = torch.aten.pow.Tensor_Scalar %2491, %int2_2706 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %2492, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int-1_2707 = torch.constant.int -1
    %2493 = torch.prim.ListConstruct %int-1_2707 : (!torch.int) -> !torch.list<int>
    %true_2708 = torch.constant.bool true
    %none_2709 = torch.constant.none
    %2494 = torch.aten.mean.dim %2492, %2493, %true_2708, %none_2709 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %2494, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_2710 = torch.constant.float 9.9999997473787516E-6
    %int1_2711 = torch.constant.int 1
    %2495 = torch.aten.add.Scalar %2494, %float9.999990e-06_2710, %int1_2711 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %2495, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %2496 = torch.aten.rsqrt %2495 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %2496, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %2497 = torch.aten.mul.Tensor %2491, %2496 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %2497, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %2498 = torch.aten.mul.Tensor %100, %2497 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %2498, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int5_2712 = torch.constant.int 5
    %2499 = torch.prims.convert_element_type %2498, %int5_2712 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %2499, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int-2_2713 = torch.constant.int -2
    %int-1_2714 = torch.constant.int -1
    %2500 = torch.aten.transpose.int %101, %int-2_2713, %int-1_2714 : !torch.vtensor<[4096,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,4096],f16>
    %int4_2715 = torch.constant.int 4
    %2501 = torch.aten.mul.int %int4_2715, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_2716 = torch.constant.int 4096
    %2502 = torch.prim.ListConstruct %2501, %int4096_2716 : (!torch.int, !torch.int) -> !torch.list<int>
    %2503 = torch.aten.view %2499, %2502 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %2503, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %2504 = torch.aten.mm %2503, %2500 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %2504, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_2717 = torch.constant.int 4
    %int4096_2718 = torch.constant.int 4096
    %2505 = torch.prim.ListConstruct %int4_2717, %294, %int4096_2718 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2506 = torch.aten.view %2504, %2505 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %2506, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int-2_2719 = torch.constant.int -2
    %int-1_2720 = torch.constant.int -1
    %2507 = torch.aten.transpose.int %102, %int-2_2719, %int-1_2720 : !torch.vtensor<[1024,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,1024],f16>
    %int4_2721 = torch.constant.int 4
    %2508 = torch.aten.mul.int %int4_2721, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_2722 = torch.constant.int 4096
    %2509 = torch.prim.ListConstruct %2508, %int4096_2722 : (!torch.int, !torch.int) -> !torch.list<int>
    %2510 = torch.aten.view %2499, %2509 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %2510, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %2511 = torch.aten.mm %2510, %2507 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1024],f16> -> !torch.vtensor<[?,1024],f16>
    torch.bind_symbolic_shape %2511, [%292], affine_map<()[s0] -> (s0 * 128, 1024)> : !torch.vtensor<[?,1024],f16>
    %int4_2723 = torch.constant.int 4
    %int1024_2724 = torch.constant.int 1024
    %2512 = torch.prim.ListConstruct %int4_2723, %294, %int1024_2724 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2513 = torch.aten.view %2511, %2512 : !torch.vtensor<[?,1024],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1024],f16>
    torch.bind_symbolic_shape %2513, [%292], affine_map<()[s0] -> (4, s0 * 32, 1024)> : !torch.vtensor<[4,?,1024],f16>
    %int-2_2725 = torch.constant.int -2
    %int-1_2726 = torch.constant.int -1
    %2514 = torch.aten.transpose.int %103, %int-2_2725, %int-1_2726 : !torch.vtensor<[1024,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,1024],f16>
    %int4_2727 = torch.constant.int 4
    %2515 = torch.aten.mul.int %int4_2727, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_2728 = torch.constant.int 4096
    %2516 = torch.prim.ListConstruct %2515, %int4096_2728 : (!torch.int, !torch.int) -> !torch.list<int>
    %2517 = torch.aten.view %2499, %2516 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %2517, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %2518 = torch.aten.mm %2517, %2514 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1024],f16> -> !torch.vtensor<[?,1024],f16>
    torch.bind_symbolic_shape %2518, [%292], affine_map<()[s0] -> (s0 * 128, 1024)> : !torch.vtensor<[?,1024],f16>
    %int4_2729 = torch.constant.int 4
    %int1024_2730 = torch.constant.int 1024
    %2519 = torch.prim.ListConstruct %int4_2729, %294, %int1024_2730 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2520 = torch.aten.view %2518, %2519 : !torch.vtensor<[?,1024],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1024],f16>
    torch.bind_symbolic_shape %2520, [%292], affine_map<()[s0] -> (4, s0 * 32, 1024)> : !torch.vtensor<[4,?,1024],f16>
    %int4_2731 = torch.constant.int 4
    %int32_2732 = torch.constant.int 32
    %int128_2733 = torch.constant.int 128
    %2521 = torch.prim.ListConstruct %int4_2731, %294, %int32_2732, %int128_2733 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2522 = torch.aten.view %2506, %2521 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %2522, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int4_2734 = torch.constant.int 4
    %int8_2735 = torch.constant.int 8
    %int128_2736 = torch.constant.int 128
    %2523 = torch.prim.ListConstruct %int4_2734, %294, %int8_2735, %int128_2736 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2524 = torch.aten.view %2513, %2523 : !torch.vtensor<[4,?,1024],f16>, !torch.list<int> -> !torch.vtensor<[4,?,8,128],f16>
    torch.bind_symbolic_shape %2524, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 128)> : !torch.vtensor<[4,?,8,128],f16>
    %int4_2737 = torch.constant.int 4
    %int8_2738 = torch.constant.int 8
    %int128_2739 = torch.constant.int 128
    %2525 = torch.prim.ListConstruct %int4_2737, %294, %int8_2738, %int128_2739 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2526 = torch.aten.view %2520, %2525 : !torch.vtensor<[4,?,1024],f16>, !torch.list<int> -> !torch.vtensor<[4,?,8,128],f16>
    torch.bind_symbolic_shape %2526, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 128)> : !torch.vtensor<[4,?,8,128],f16>
    %int131072_2740 = torch.constant.int 131072
    %none_2741 = torch.constant.none
    %none_2742 = torch.constant.none
    %cpu_2743 = torch.constant.device "cpu"
    %false_2744 = torch.constant.bool false
    %2527 = torch.aten.arange %int131072_2740, %none_2741, %none_2742, %cpu_2743, %false_2744 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64>
    %int0_2745 = torch.constant.int 0
    %int128_2746 = torch.constant.int 128
    %int2_2747 = torch.constant.int 2
    %none_2748 = torch.constant.none
    %none_2749 = torch.constant.none
    %cpu_2750 = torch.constant.device "cpu"
    %false_2751 = torch.constant.bool false
    %2528 = torch.aten.arange.start_step %int0_2745, %int128_2746, %int2_2747, %none_2748, %none_2749, %cpu_2750, %false_2751 : !torch.int, !torch.int, !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64>
    %int0_2752 = torch.constant.int 0
    %int0_2753 = torch.constant.int 0
    %int64_2754 = torch.constant.int 64
    %int1_2755 = torch.constant.int 1
    %2529 = torch.aten.slice.Tensor %2528, %int0_2752, %int0_2753, %int64_2754, %int1_2755 : !torch.vtensor<[64],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[64],si64>
    %int6_2756 = torch.constant.int 6
    %2530 = torch.prims.convert_element_type %2529, %int6_2756 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32>
    %int128_2757 = torch.constant.int 128
    %2531 = torch.aten.div.Scalar %2530, %int128_2757 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32>
    %float5.000000e05_2758 = torch.constant.float 5.000000e+05
    %2532 = torch.aten.pow.Scalar %float5.000000e05_2758, %2531 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %2533 = torch.aten.reciprocal %2532 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %float1.000000e00_2759 = torch.constant.float 1.000000e+00
    %2534 = torch.aten.mul.Scalar %2533, %float1.000000e00_2759 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32>
    %int131072_2760 = torch.constant.int 131072
    %int1_2761 = torch.constant.int 1
    %2535 = torch.prim.ListConstruct %int131072_2760, %int1_2761 : (!torch.int, !torch.int) -> !torch.list<int>
    %2536 = torch.aten.view %2527, %2535 : !torch.vtensor<[131072],si64>, !torch.list<int> -> !torch.vtensor<[131072,1],si64>
    %2537 = torch.aten.mul.Tensor %2536, %2534 : !torch.vtensor<[131072,1],si64>, !torch.vtensor<[64],f32> -> !torch.vtensor<[131072,64],f32>
    %2538 = torch.aten.cos %2537 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %2539 = torch.aten.sin %2537 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %2540 = torch.aten.complex %2538, %2539 : !torch.vtensor<[131072,64],f32>, !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],complex<f32>>
    %int1_2762 = torch.constant.int 1
    %2541 = torch.aten.size.int %2506, %int1_2762 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.int
    %int0_2763 = torch.constant.int 0
    %2542 = torch.aten.add.int %int0_2763, %2541 : !torch.int, !torch.int -> !torch.int
    %int0_2764 = torch.constant.int 0
    %int0_2765 = torch.constant.int 0
    %int1_2766 = torch.constant.int 1
    %2543 = torch.aten.slice.Tensor %2540, %int0_2764, %int0_2765, %2542, %int1_2766 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %2543, [%292], affine_map<()[s0] -> (s0 * 32, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_2767 = torch.constant.int 1
    %int0_2768 = torch.constant.int 0
    %int9223372036854775807_2769 = torch.constant.int 9223372036854775807
    %int1_2770 = torch.constant.int 1
    %2544 = torch.aten.slice.Tensor %2543, %int1_2767, %int0_2768, %int9223372036854775807_2769, %int1_2770 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %2544, [%292], affine_map<()[s0] -> (s0 * 32, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_2771 = torch.constant.int 0
    %2545 = torch.aten.unsqueeze %2544, %int0_2771 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %2545, [%292], affine_map<()[s0] -> (1, s0 * 32, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_2772 = torch.constant.int 2
    %2546 = torch.aten.unsqueeze %2545, %int2_2772 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %2546, [%292], affine_map<()[s0] -> (1, s0 * 32, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_2773 = torch.constant.int 3
    %int0_2774 = torch.constant.int 0
    %int9223372036854775807_2775 = torch.constant.int 9223372036854775807
    %int1_2776 = torch.constant.int 1
    %2547 = torch.aten.slice.Tensor %2546, %int3_2773, %int0_2774, %int9223372036854775807_2775, %int1_2776 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %2547, [%292], affine_map<()[s0] -> (1, s0 * 32, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %2548 = torch_c.to_builtin_tensor %2522 : !torch.vtensor<[4,?,32,128],f16> -> tensor<4x?x32x128xf16>
    %c1_2777 = arith.constant 1 : index
    %dim_2778 = tensor.dim %2548, %c1_2777 : tensor<4x?x32x128xf16>
    %2549 = flow.tensor.bitcast %2548 : tensor<4x?x32x128xf16>{%dim_2778} -> tensor<4x?x32x64xcomplex<f16>>{%dim_2778}
    %2550 = torch_c.from_builtin_tensor %2549 : tensor<4x?x32x64xcomplex<f16>> -> !torch.vtensor<[4,?,32,64],complex<f16>>
    torch.bind_symbolic_shape %2550, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 64)> : !torch.vtensor<[4,?,32,64],complex<f16>>
    %2551 = torch.aten.mul.Tensor %2550, %2547 : !torch.vtensor<[4,?,32,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,32,64],complex<f32>>
    torch.bind_symbolic_shape %2551, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 64)> : !torch.vtensor<[4,?,32,64],complex<f32>>
    %2552 = torch_c.to_builtin_tensor %2551 : !torch.vtensor<[4,?,32,64],complex<f32>> -> tensor<4x?x32x64xcomplex<f32>>
    %c1_2779 = arith.constant 1 : index
    %dim_2780 = tensor.dim %2552, %c1_2779 : tensor<4x?x32x64xcomplex<f32>>
    %2553 = flow.tensor.bitcast %2552 : tensor<4x?x32x64xcomplex<f32>>{%dim_2780} -> tensor<4x?x32x128xf32>{%dim_2780}
    %2554 = torch_c.from_builtin_tensor %2553 : tensor<4x?x32x128xf32> -> !torch.vtensor<[4,?,32,128],f32>
    torch.bind_symbolic_shape %2554, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f32>
    %int5_2781 = torch.constant.int 5
    %2555 = torch.prims.convert_element_type %2554, %int5_2781 : !torch.vtensor<[4,?,32,128],f32>, !torch.int -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %2555, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int131072_2782 = torch.constant.int 131072
    %none_2783 = torch.constant.none
    %none_2784 = torch.constant.none
    %cpu_2785 = torch.constant.device "cpu"
    %false_2786 = torch.constant.bool false
    %2556 = torch.aten.arange %int131072_2782, %none_2783, %none_2784, %cpu_2785, %false_2786 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64>
    %int0_2787 = torch.constant.int 0
    %int128_2788 = torch.constant.int 128
    %int2_2789 = torch.constant.int 2
    %none_2790 = torch.constant.none
    %none_2791 = torch.constant.none
    %cpu_2792 = torch.constant.device "cpu"
    %false_2793 = torch.constant.bool false
    %2557 = torch.aten.arange.start_step %int0_2787, %int128_2788, %int2_2789, %none_2790, %none_2791, %cpu_2792, %false_2793 : !torch.int, !torch.int, !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64>
    %int0_2794 = torch.constant.int 0
    %int0_2795 = torch.constant.int 0
    %int64_2796 = torch.constant.int 64
    %int1_2797 = torch.constant.int 1
    %2558 = torch.aten.slice.Tensor %2557, %int0_2794, %int0_2795, %int64_2796, %int1_2797 : !torch.vtensor<[64],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[64],si64>
    %int6_2798 = torch.constant.int 6
    %2559 = torch.prims.convert_element_type %2558, %int6_2798 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32>
    %int128_2799 = torch.constant.int 128
    %2560 = torch.aten.div.Scalar %2559, %int128_2799 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32>
    %float5.000000e05_2800 = torch.constant.float 5.000000e+05
    %2561 = torch.aten.pow.Scalar %float5.000000e05_2800, %2560 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %2562 = torch.aten.reciprocal %2561 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %float1.000000e00_2801 = torch.constant.float 1.000000e+00
    %2563 = torch.aten.mul.Scalar %2562, %float1.000000e00_2801 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32>
    %int131072_2802 = torch.constant.int 131072
    %int1_2803 = torch.constant.int 1
    %2564 = torch.prim.ListConstruct %int131072_2802, %int1_2803 : (!torch.int, !torch.int) -> !torch.list<int>
    %2565 = torch.aten.view %2556, %2564 : !torch.vtensor<[131072],si64>, !torch.list<int> -> !torch.vtensor<[131072,1],si64>
    %2566 = torch.aten.mul.Tensor %2565, %2563 : !torch.vtensor<[131072,1],si64>, !torch.vtensor<[64],f32> -> !torch.vtensor<[131072,64],f32>
    %2567 = torch.aten.cos %2566 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %2568 = torch.aten.sin %2566 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %2569 = torch.aten.complex %2567, %2568 : !torch.vtensor<[131072,64],f32>, !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],complex<f32>>
    %int1_2804 = torch.constant.int 1
    %2570 = torch.aten.size.int %2513, %int1_2804 : !torch.vtensor<[4,?,1024],f16>, !torch.int -> !torch.int
    %int0_2805 = torch.constant.int 0
    %2571 = torch.aten.add.int %int0_2805, %2570 : !torch.int, !torch.int -> !torch.int
    %int0_2806 = torch.constant.int 0
    %int0_2807 = torch.constant.int 0
    %int1_2808 = torch.constant.int 1
    %2572 = torch.aten.slice.Tensor %2569, %int0_2806, %int0_2807, %2571, %int1_2808 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %2572, [%292], affine_map<()[s0] -> (s0 * 32, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_2809 = torch.constant.int 1
    %int0_2810 = torch.constant.int 0
    %int9223372036854775807_2811 = torch.constant.int 9223372036854775807
    %int1_2812 = torch.constant.int 1
    %2573 = torch.aten.slice.Tensor %2572, %int1_2809, %int0_2810, %int9223372036854775807_2811, %int1_2812 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %2573, [%292], affine_map<()[s0] -> (s0 * 32, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_2813 = torch.constant.int 0
    %2574 = torch.aten.unsqueeze %2573, %int0_2813 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %2574, [%292], affine_map<()[s0] -> (1, s0 * 32, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_2814 = torch.constant.int 2
    %2575 = torch.aten.unsqueeze %2574, %int2_2814 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %2575, [%292], affine_map<()[s0] -> (1, s0 * 32, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_2815 = torch.constant.int 3
    %int0_2816 = torch.constant.int 0
    %int9223372036854775807_2817 = torch.constant.int 9223372036854775807
    %int1_2818 = torch.constant.int 1
    %2576 = torch.aten.slice.Tensor %2575, %int3_2815, %int0_2816, %int9223372036854775807_2817, %int1_2818 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %2576, [%292], affine_map<()[s0] -> (1, s0 * 32, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %2577 = torch_c.to_builtin_tensor %2524 : !torch.vtensor<[4,?,8,128],f16> -> tensor<4x?x8x128xf16>
    %c1_2819 = arith.constant 1 : index
    %dim_2820 = tensor.dim %2577, %c1_2819 : tensor<4x?x8x128xf16>
    %2578 = flow.tensor.bitcast %2577 : tensor<4x?x8x128xf16>{%dim_2820} -> tensor<4x?x8x64xcomplex<f16>>{%dim_2820}
    %2579 = torch_c.from_builtin_tensor %2578 : tensor<4x?x8x64xcomplex<f16>> -> !torch.vtensor<[4,?,8,64],complex<f16>>
    torch.bind_symbolic_shape %2579, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 64)> : !torch.vtensor<[4,?,8,64],complex<f16>>
    %2580 = torch.aten.mul.Tensor %2579, %2576 : !torch.vtensor<[4,?,8,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,8,64],complex<f32>>
    torch.bind_symbolic_shape %2580, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 64)> : !torch.vtensor<[4,?,8,64],complex<f32>>
    %2581 = torch_c.to_builtin_tensor %2580 : !torch.vtensor<[4,?,8,64],complex<f32>> -> tensor<4x?x8x64xcomplex<f32>>
    %c1_2821 = arith.constant 1 : index
    %dim_2822 = tensor.dim %2581, %c1_2821 : tensor<4x?x8x64xcomplex<f32>>
    %2582 = flow.tensor.bitcast %2581 : tensor<4x?x8x64xcomplex<f32>>{%dim_2822} -> tensor<4x?x8x128xf32>{%dim_2822}
    %2583 = torch_c.from_builtin_tensor %2582 : tensor<4x?x8x128xf32> -> !torch.vtensor<[4,?,8,128],f32>
    torch.bind_symbolic_shape %2583, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 128)> : !torch.vtensor<[4,?,8,128],f32>
    %int5_2823 = torch.constant.int 5
    %2584 = torch.prims.convert_element_type %2583, %int5_2823 : !torch.vtensor<[4,?,8,128],f32>, !torch.int -> !torch.vtensor<[4,?,8,128],f16>
    torch.bind_symbolic_shape %2584, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 128)> : !torch.vtensor<[4,?,8,128],f16>
    %int64_2824 = torch.constant.int 64
    %2585 = torch.aten.mul.Scalar %arg2, %int64_2824 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %2585, [%292], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int22 = torch.constant.int 22
    %int1_2825 = torch.constant.int 1
    %2586 = torch.aten.add.Scalar %2585, %int22, %int1_2825 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %2586, [%292], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int4_2826 = torch.constant.int 4
    %int32_2827 = torch.constant.int 32
    %int8_2828 = torch.constant.int 8
    %int128_2829 = torch.constant.int 128
    %2587 = torch.prim.ListConstruct %int4_2826, %425, %int32_2827, %int8_2828, %int128_2829 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2588 = torch.aten.view %2584, %2587 : !torch.vtensor<[4,?,8,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,8,128],f16>
    torch.bind_symbolic_shape %2588, [%292], affine_map<()[s0] -> (4, s0, 32, 8, 128)> : !torch.vtensor<[4,?,32,8,128],f16>
    %int4_2830 = torch.constant.int 4
    %2589 = torch.aten.mul.int %int4_2830, %425 : !torch.int, !torch.int -> !torch.int
    %int32_2831 = torch.constant.int 32
    %int8_2832 = torch.constant.int 8
    %int128_2833 = torch.constant.int 128
    %2590 = torch.prim.ListConstruct %2589, %int32_2831, %int8_2832, %int128_2833 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2591 = torch.aten.view %2588, %2590 : !torch.vtensor<[4,?,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %2591, [%292], affine_map<()[s0] -> (s0 * 4, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int4_2834 = torch.constant.int 4
    %2592 = torch.aten.mul.int %int4_2834, %425 : !torch.int, !torch.int -> !torch.int
    %2593 = torch.prim.ListConstruct %2592 : (!torch.int) -> !torch.list<int>
    %2594 = torch.aten.view %2586, %2593 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %2594, [%292], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int32_2835 = torch.constant.int 32
    %int2_2836 = torch.constant.int 2
    %int32_2837 = torch.constant.int 32
    %int8_2838 = torch.constant.int 8
    %int128_2839 = torch.constant.int 128
    %2595 = torch.prim.ListConstruct %416, %int32_2835, %int2_2836, %int32_2837, %int8_2838, %int128_2839 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2596 = torch.aten.view %2428, %2595 : !torch.vtensor<[?,2097152],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f16>
    torch.bind_symbolic_shape %2596, [%293], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f16>
    %int32_2840 = torch.constant.int 32
    %2597 = torch.aten.mul.int %416, %int32_2840 : !torch.int, !torch.int -> !torch.int
    %int2_2841 = torch.constant.int 2
    %2598 = torch.aten.mul.int %2597, %int2_2841 : !torch.int, !torch.int -> !torch.int
    %int32_2842 = torch.constant.int 32
    %int8_2843 = torch.constant.int 8
    %int128_2844 = torch.constant.int 128
    %2599 = torch.prim.ListConstruct %2598, %int32_2842, %int8_2843, %int128_2844 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2600 = torch.aten.view %2596, %2599 : !torch.vtensor<[?,32,2,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %2600, [%293], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %2601 = torch.prim.ListConstruct %2594 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_2845 = torch.constant.bool false
    %2602 = torch.aten.index_put %2600, %2601, %2591, %false_2845 : !torch.vtensor<[?,32,8,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,32,8,128],f16>, !torch.bool -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %2602, [%293], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int32_2846 = torch.constant.int 32
    %int2_2847 = torch.constant.int 2
    %int32_2848 = torch.constant.int 32
    %int8_2849 = torch.constant.int 8
    %int128_2850 = torch.constant.int 128
    %2603 = torch.prim.ListConstruct %416, %int32_2846, %int2_2847, %int32_2848, %int8_2849, %int128_2850 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2604 = torch.aten.view %2602, %2603 : !torch.vtensor<[?,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f16>
    torch.bind_symbolic_shape %2604, [%293], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f16>
    %int2097152_2851 = torch.constant.int 2097152
    %2605 = torch.prim.ListConstruct %416, %int2097152_2851 : (!torch.int, !torch.int) -> !torch.list<int>
    %2606 = torch.aten.view %2604, %2605 : !torch.vtensor<[?,32,2,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,2097152],f16>
    torch.bind_symbolic_shape %2606, [%293], affine_map<()[s0] -> (s0, 2097152)> : !torch.vtensor<[?,2097152],f16>
    %int32_2852 = torch.constant.int 32
    %int2_2853 = torch.constant.int 2
    %int32_2854 = torch.constant.int 32
    %int8_2855 = torch.constant.int 8
    %int128_2856 = torch.constant.int 128
    %2607 = torch.prim.ListConstruct %416, %int32_2852, %int2_2853, %int32_2854, %int8_2855, %int128_2856 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2608 = torch.aten.view %2606, %2607 : !torch.vtensor<[?,2097152],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f16>
    torch.bind_symbolic_shape %2608, [%293], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f16>
    %int32_2857 = torch.constant.int 32
    %int8_2858 = torch.constant.int 8
    %int128_2859 = torch.constant.int 128
    %2609 = torch.prim.ListConstruct %2598, %int32_2857, %int8_2858, %int128_2859 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2610 = torch.aten.view %2608, %2609 : !torch.vtensor<[?,32,2,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %2610, [%293], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int4_2860 = torch.constant.int 4
    %int32_2861 = torch.constant.int 32
    %int8_2862 = torch.constant.int 8
    %int128_2863 = torch.constant.int 128
    %2611 = torch.prim.ListConstruct %int4_2860, %425, %int32_2861, %int8_2862, %int128_2863 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2612 = torch.aten.view %2526, %2611 : !torch.vtensor<[4,?,8,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,8,128],f16>
    torch.bind_symbolic_shape %2612, [%292], affine_map<()[s0] -> (4, s0, 32, 8, 128)> : !torch.vtensor<[4,?,32,8,128],f16>
    %int4_2864 = torch.constant.int 4
    %2613 = torch.aten.mul.int %int4_2864, %425 : !torch.int, !torch.int -> !torch.int
    %int32_2865 = torch.constant.int 32
    %int8_2866 = torch.constant.int 8
    %int128_2867 = torch.constant.int 128
    %2614 = torch.prim.ListConstruct %2613, %int32_2865, %int8_2866, %int128_2867 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2615 = torch.aten.view %2612, %2614 : !torch.vtensor<[4,?,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %2615, [%292], affine_map<()[s0] -> (s0 * 4, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int1_2868 = torch.constant.int 1
    %int1_2869 = torch.constant.int 1
    %2616 = torch.aten.add.Scalar %2586, %int1_2868, %int1_2869 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %2616, [%292], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int4_2870 = torch.constant.int 4
    %2617 = torch.aten.mul.int %int4_2870, %425 : !torch.int, !torch.int -> !torch.int
    %2618 = torch.prim.ListConstruct %2617 : (!torch.int) -> !torch.list<int>
    %2619 = torch.aten.view %2616, %2618 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %2619, [%292], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %2620 = torch.prim.ListConstruct %2619 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_2871 = torch.constant.bool false
    %2621 = torch.aten.index_put %2610, %2620, %2615, %false_2871 : !torch.vtensor<[?,32,8,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,32,8,128],f16>, !torch.bool -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %2621, [%293], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int32_2872 = torch.constant.int 32
    %int2_2873 = torch.constant.int 2
    %int32_2874 = torch.constant.int 32
    %int8_2875 = torch.constant.int 8
    %int128_2876 = torch.constant.int 128
    %2622 = torch.prim.ListConstruct %416, %int32_2872, %int2_2873, %int32_2874, %int8_2875, %int128_2876 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2623 = torch.aten.view %2621, %2622 : !torch.vtensor<[?,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f16>
    torch.bind_symbolic_shape %2623, [%293], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f16>
    %int2097152_2877 = torch.constant.int 2097152
    %2624 = torch.prim.ListConstruct %416, %int2097152_2877 : (!torch.int, !torch.int) -> !torch.list<int>
    %2625 = torch.aten.view %2623, %2624 : !torch.vtensor<[?,32,2,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,2097152],f16>
    torch.bind_symbolic_shape %2625, [%293], affine_map<()[s0] -> (s0, 2097152)> : !torch.vtensor<[?,2097152],f16>
    %int-2_2878 = torch.constant.int -2
    %2626 = torch.aten.unsqueeze %2584, %int-2_2878 : !torch.vtensor<[4,?,8,128],f16>, !torch.int -> !torch.vtensor<[4,?,8,1,128],f16>
    torch.bind_symbolic_shape %2626, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 1, 128)> : !torch.vtensor<[4,?,8,1,128],f16>
    %int4_2879 = torch.constant.int 4
    %int8_2880 = torch.constant.int 8
    %int4_2881 = torch.constant.int 4
    %int128_2882 = torch.constant.int 128
    %2627 = torch.prim.ListConstruct %int4_2879, %2570, %int8_2880, %int4_2881, %int128_2882 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_2883 = torch.constant.bool false
    %2628 = torch.aten.expand %2626, %2627, %false_2883 : !torch.vtensor<[4,?,8,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,8,4,128],f16>
    torch.bind_symbolic_shape %2628, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 4, 128)> : !torch.vtensor<[4,?,8,4,128],f16>
    %int0_2884 = torch.constant.int 0
    %2629 = torch.aten.clone %2628, %int0_2884 : !torch.vtensor<[4,?,8,4,128],f16>, !torch.int -> !torch.vtensor<[4,?,8,4,128],f16>
    torch.bind_symbolic_shape %2629, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 4, 128)> : !torch.vtensor<[4,?,8,4,128],f16>
    %int4_2885 = torch.constant.int 4
    %int32_2886 = torch.constant.int 32
    %int128_2887 = torch.constant.int 128
    %2630 = torch.prim.ListConstruct %int4_2885, %2570, %int32_2886, %int128_2887 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2631 = torch.aten._unsafe_view %2629, %2630 : !torch.vtensor<[4,?,8,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %2631, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int-2_2888 = torch.constant.int -2
    %2632 = torch.aten.unsqueeze %2526, %int-2_2888 : !torch.vtensor<[4,?,8,128],f16>, !torch.int -> !torch.vtensor<[4,?,8,1,128],f16>
    torch.bind_symbolic_shape %2632, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 1, 128)> : !torch.vtensor<[4,?,8,1,128],f16>
    %int1_2889 = torch.constant.int 1
    %2633 = torch.aten.size.int %2520, %int1_2889 : !torch.vtensor<[4,?,1024],f16>, !torch.int -> !torch.int
    %int4_2890 = torch.constant.int 4
    %int8_2891 = torch.constant.int 8
    %int4_2892 = torch.constant.int 4
    %int128_2893 = torch.constant.int 128
    %2634 = torch.prim.ListConstruct %int4_2890, %2633, %int8_2891, %int4_2892, %int128_2893 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_2894 = torch.constant.bool false
    %2635 = torch.aten.expand %2632, %2634, %false_2894 : !torch.vtensor<[4,?,8,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,8,4,128],f16>
    torch.bind_symbolic_shape %2635, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 4, 128)> : !torch.vtensor<[4,?,8,4,128],f16>
    %int0_2895 = torch.constant.int 0
    %2636 = torch.aten.clone %2635, %int0_2895 : !torch.vtensor<[4,?,8,4,128],f16>, !torch.int -> !torch.vtensor<[4,?,8,4,128],f16>
    torch.bind_symbolic_shape %2636, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 4, 128)> : !torch.vtensor<[4,?,8,4,128],f16>
    %int4_2896 = torch.constant.int 4
    %int32_2897 = torch.constant.int 32
    %int128_2898 = torch.constant.int 128
    %2637 = torch.prim.ListConstruct %int4_2896, %2633, %int32_2897, %int128_2898 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2638 = torch.aten._unsafe_view %2636, %2637 : !torch.vtensor<[4,?,8,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %2638, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int1_2899 = torch.constant.int 1
    %int2_2900 = torch.constant.int 2
    %2639 = torch.aten.transpose.int %2555, %int1_2899, %int2_2900 : !torch.vtensor<[4,?,32,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,32,?,128],f16>
    torch.bind_symbolic_shape %2639, [%292], affine_map<()[s0] -> (4, 32, s0 * 32, 128)> : !torch.vtensor<[4,32,?,128],f16>
    %int1_2901 = torch.constant.int 1
    %int2_2902 = torch.constant.int 2
    %2640 = torch.aten.transpose.int %2631, %int1_2901, %int2_2902 : !torch.vtensor<[4,?,32,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,32,?,128],f16>
    torch.bind_symbolic_shape %2640, [%292], affine_map<()[s0] -> (4, 32, s0 * 32, 128)> : !torch.vtensor<[4,32,?,128],f16>
    %int1_2903 = torch.constant.int 1
    %int2_2904 = torch.constant.int 2
    %2641 = torch.aten.transpose.int %2638, %int1_2903, %int2_2904 : !torch.vtensor<[4,?,32,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,32,?,128],f16>
    torch.bind_symbolic_shape %2641, [%292], affine_map<()[s0] -> (4, 32, s0 * 32, 128)> : !torch.vtensor<[4,32,?,128],f16>
    %float0.000000e00_2905 = torch.constant.float 0.000000e+00
    %false_2906 = torch.constant.bool false
    %none_2907 = torch.constant.none
    %2642:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%2639, %2640, %2641, %float0.000000e00_2905, %false_2906, %320, %none_2907) : (!torch.vtensor<[4,32,?,128],f16>, !torch.vtensor<[4,32,?,128],f16>, !torch.vtensor<[4,32,?,128],f16>, !torch.float, !torch.bool, !torch.vtensor<[4,1,?,?],f16>, !torch.none) -> (!torch.vtensor<[4,32,?,128],f16>, !torch.vtensor<[4,32,?],f32>) 
    torch.bind_symbolic_shape %2642#0, [%292], affine_map<()[s0] -> (4, 32, s0 * 32, 128)> : !torch.vtensor<[4,32,?,128],f16>
    %int1_2908 = torch.constant.int 1
    %int2_2909 = torch.constant.int 2
    %2643 = torch.aten.transpose.int %2642#0, %int1_2908, %int2_2909 : !torch.vtensor<[4,32,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %2643, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int4_2910 = torch.constant.int 4
    %int4096_2911 = torch.constant.int 4096
    %2644 = torch.prim.ListConstruct %int4_2910, %2541, %int4096_2911 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2645 = torch.aten.view %2643, %2644 : !torch.vtensor<[4,?,32,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %2645, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int-2_2912 = torch.constant.int -2
    %int-1_2913 = torch.constant.int -1
    %2646 = torch.aten.transpose.int %104, %int-2_2912, %int-1_2913 : !torch.vtensor<[4096,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,4096],f16>
    %int4_2914 = torch.constant.int 4
    %2647 = torch.aten.mul.int %int4_2914, %2541 : !torch.int, !torch.int -> !torch.int
    %int4096_2915 = torch.constant.int 4096
    %2648 = torch.prim.ListConstruct %2647, %int4096_2915 : (!torch.int, !torch.int) -> !torch.list<int>
    %2649 = torch.aten.view %2645, %2648 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %2649, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %2650 = torch.aten.mm %2649, %2646 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %2650, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_2916 = torch.constant.int 4
    %int4096_2917 = torch.constant.int 4096
    %2651 = torch.prim.ListConstruct %int4_2916, %2541, %int4096_2917 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2652 = torch.aten.view %2650, %2651 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %2652, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_2918 = torch.constant.int 1
    %2653 = torch.aten.add.Tensor %2490, %2652, %int1_2918 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %2653, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int6_2919 = torch.constant.int 6
    %2654 = torch.prims.convert_element_type %2653, %int6_2919 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %2654, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_2920 = torch.constant.int 2
    %2655 = torch.aten.pow.Tensor_Scalar %2654, %int2_2920 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %2655, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int-1_2921 = torch.constant.int -1
    %2656 = torch.prim.ListConstruct %int-1_2921 : (!torch.int) -> !torch.list<int>
    %true_2922 = torch.constant.bool true
    %none_2923 = torch.constant.none
    %2657 = torch.aten.mean.dim %2655, %2656, %true_2922, %none_2923 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %2657, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_2924 = torch.constant.float 9.9999997473787516E-6
    %int1_2925 = torch.constant.int 1
    %2658 = torch.aten.add.Scalar %2657, %float9.999990e-06_2924, %int1_2925 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %2658, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %2659 = torch.aten.rsqrt %2658 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %2659, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %2660 = torch.aten.mul.Tensor %2654, %2659 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %2660, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %2661 = torch.aten.mul.Tensor %105, %2660 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %2661, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int5_2926 = torch.constant.int 5
    %2662 = torch.prims.convert_element_type %2661, %int5_2926 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %2662, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int-2_2927 = torch.constant.int -2
    %int-1_2928 = torch.constant.int -1
    %2663 = torch.aten.transpose.int %106, %int-2_2927, %int-1_2928 : !torch.vtensor<[14336,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,14336],f16>
    %int4_2929 = torch.constant.int 4
    %2664 = torch.aten.mul.int %int4_2929, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_2930 = torch.constant.int 4096
    %2665 = torch.prim.ListConstruct %2664, %int4096_2930 : (!torch.int, !torch.int) -> !torch.list<int>
    %2666 = torch.aten.view %2662, %2665 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %2666, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %2667 = torch.aten.mm %2666, %2663 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,14336],f16> -> !torch.vtensor<[?,14336],f16>
    torch.bind_symbolic_shape %2667, [%292], affine_map<()[s0] -> (s0 * 128, 14336)> : !torch.vtensor<[?,14336],f16>
    %int4_2931 = torch.constant.int 4
    %int14336_2932 = torch.constant.int 14336
    %2668 = torch.prim.ListConstruct %int4_2931, %294, %int14336_2932 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2669 = torch.aten.view %2667, %2668 : !torch.vtensor<[?,14336],f16>, !torch.list<int> -> !torch.vtensor<[4,?,14336],f16>
    torch.bind_symbolic_shape %2669, [%292], affine_map<()[s0] -> (4, s0 * 32, 14336)> : !torch.vtensor<[4,?,14336],f16>
    %2670 = torch.aten.silu %2669 : !torch.vtensor<[4,?,14336],f16> -> !torch.vtensor<[4,?,14336],f16>
    torch.bind_symbolic_shape %2670, [%292], affine_map<()[s0] -> (4, s0 * 32, 14336)> : !torch.vtensor<[4,?,14336],f16>
    %int-2_2933 = torch.constant.int -2
    %int-1_2934 = torch.constant.int -1
    %2671 = torch.aten.transpose.int %107, %int-2_2933, %int-1_2934 : !torch.vtensor<[14336,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,14336],f16>
    %int4_2935 = torch.constant.int 4
    %2672 = torch.aten.mul.int %int4_2935, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_2936 = torch.constant.int 4096
    %2673 = torch.prim.ListConstruct %2672, %int4096_2936 : (!torch.int, !torch.int) -> !torch.list<int>
    %2674 = torch.aten.view %2662, %2673 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %2674, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %2675 = torch.aten.mm %2674, %2671 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,14336],f16> -> !torch.vtensor<[?,14336],f16>
    torch.bind_symbolic_shape %2675, [%292], affine_map<()[s0] -> (s0 * 128, 14336)> : !torch.vtensor<[?,14336],f16>
    %int4_2937 = torch.constant.int 4
    %int14336_2938 = torch.constant.int 14336
    %2676 = torch.prim.ListConstruct %int4_2937, %294, %int14336_2938 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2677 = torch.aten.view %2675, %2676 : !torch.vtensor<[?,14336],f16>, !torch.list<int> -> !torch.vtensor<[4,?,14336],f16>
    torch.bind_symbolic_shape %2677, [%292], affine_map<()[s0] -> (4, s0 * 32, 14336)> : !torch.vtensor<[4,?,14336],f16>
    %2678 = torch.aten.mul.Tensor %2670, %2677 : !torch.vtensor<[4,?,14336],f16>, !torch.vtensor<[4,?,14336],f16> -> !torch.vtensor<[4,?,14336],f16>
    torch.bind_symbolic_shape %2678, [%292], affine_map<()[s0] -> (4, s0 * 32, 14336)> : !torch.vtensor<[4,?,14336],f16>
    %int-2_2939 = torch.constant.int -2
    %int-1_2940 = torch.constant.int -1
    %2679 = torch.aten.transpose.int %108, %int-2_2939, %int-1_2940 : !torch.vtensor<[4096,14336],f16>, !torch.int, !torch.int -> !torch.vtensor<[14336,4096],f16>
    %int1_2941 = torch.constant.int 1
    %2680 = torch.aten.size.int %2669, %int1_2941 : !torch.vtensor<[4,?,14336],f16>, !torch.int -> !torch.int
    %int4_2942 = torch.constant.int 4
    %2681 = torch.aten.mul.int %int4_2942, %2680 : !torch.int, !torch.int -> !torch.int
    %int14336_2943 = torch.constant.int 14336
    %2682 = torch.prim.ListConstruct %2681, %int14336_2943 : (!torch.int, !torch.int) -> !torch.list<int>
    %2683 = torch.aten.view %2678, %2682 : !torch.vtensor<[4,?,14336],f16>, !torch.list<int> -> !torch.vtensor<[?,14336],f16>
    torch.bind_symbolic_shape %2683, [%292], affine_map<()[s0] -> (s0 * 128, 14336)> : !torch.vtensor<[?,14336],f16>
    %2684 = torch.aten.mm %2683, %2679 : !torch.vtensor<[?,14336],f16>, !torch.vtensor<[14336,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %2684, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_2944 = torch.constant.int 4
    %int4096_2945 = torch.constant.int 4096
    %2685 = torch.prim.ListConstruct %int4_2944, %2680, %int4096_2945 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2686 = torch.aten.view %2684, %2685 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %2686, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_2946 = torch.constant.int 1
    %2687 = torch.aten.add.Tensor %2653, %2686, %int1_2946 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %2687, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int6_2947 = torch.constant.int 6
    %2688 = torch.prims.convert_element_type %2687, %int6_2947 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %2688, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_2948 = torch.constant.int 2
    %2689 = torch.aten.pow.Tensor_Scalar %2688, %int2_2948 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %2689, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int-1_2949 = torch.constant.int -1
    %2690 = torch.prim.ListConstruct %int-1_2949 : (!torch.int) -> !torch.list<int>
    %true_2950 = torch.constant.bool true
    %none_2951 = torch.constant.none
    %2691 = torch.aten.mean.dim %2689, %2690, %true_2950, %none_2951 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %2691, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_2952 = torch.constant.float 9.9999997473787516E-6
    %int1_2953 = torch.constant.int 1
    %2692 = torch.aten.add.Scalar %2691, %float9.999990e-06_2952, %int1_2953 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %2692, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %2693 = torch.aten.rsqrt %2692 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %2693, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %2694 = torch.aten.mul.Tensor %2688, %2693 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %2694, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %2695 = torch.aten.mul.Tensor %109, %2694 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %2695, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int5_2954 = torch.constant.int 5
    %2696 = torch.prims.convert_element_type %2695, %int5_2954 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %2696, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int-2_2955 = torch.constant.int -2
    %int-1_2956 = torch.constant.int -1
    %2697 = torch.aten.transpose.int %110, %int-2_2955, %int-1_2956 : !torch.vtensor<[4096,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,4096],f16>
    %int4_2957 = torch.constant.int 4
    %2698 = torch.aten.mul.int %int4_2957, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_2958 = torch.constant.int 4096
    %2699 = torch.prim.ListConstruct %2698, %int4096_2958 : (!torch.int, !torch.int) -> !torch.list<int>
    %2700 = torch.aten.view %2696, %2699 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %2700, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %2701 = torch.aten.mm %2700, %2697 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %2701, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_2959 = torch.constant.int 4
    %int4096_2960 = torch.constant.int 4096
    %2702 = torch.prim.ListConstruct %int4_2959, %294, %int4096_2960 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2703 = torch.aten.view %2701, %2702 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %2703, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int-2_2961 = torch.constant.int -2
    %int-1_2962 = torch.constant.int -1
    %2704 = torch.aten.transpose.int %111, %int-2_2961, %int-1_2962 : !torch.vtensor<[1024,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,1024],f16>
    %int4_2963 = torch.constant.int 4
    %2705 = torch.aten.mul.int %int4_2963, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_2964 = torch.constant.int 4096
    %2706 = torch.prim.ListConstruct %2705, %int4096_2964 : (!torch.int, !torch.int) -> !torch.list<int>
    %2707 = torch.aten.view %2696, %2706 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %2707, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %2708 = torch.aten.mm %2707, %2704 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1024],f16> -> !torch.vtensor<[?,1024],f16>
    torch.bind_symbolic_shape %2708, [%292], affine_map<()[s0] -> (s0 * 128, 1024)> : !torch.vtensor<[?,1024],f16>
    %int4_2965 = torch.constant.int 4
    %int1024_2966 = torch.constant.int 1024
    %2709 = torch.prim.ListConstruct %int4_2965, %294, %int1024_2966 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2710 = torch.aten.view %2708, %2709 : !torch.vtensor<[?,1024],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1024],f16>
    torch.bind_symbolic_shape %2710, [%292], affine_map<()[s0] -> (4, s0 * 32, 1024)> : !torch.vtensor<[4,?,1024],f16>
    %int-2_2967 = torch.constant.int -2
    %int-1_2968 = torch.constant.int -1
    %2711 = torch.aten.transpose.int %112, %int-2_2967, %int-1_2968 : !torch.vtensor<[1024,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,1024],f16>
    %int4_2969 = torch.constant.int 4
    %2712 = torch.aten.mul.int %int4_2969, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_2970 = torch.constant.int 4096
    %2713 = torch.prim.ListConstruct %2712, %int4096_2970 : (!torch.int, !torch.int) -> !torch.list<int>
    %2714 = torch.aten.view %2696, %2713 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %2714, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %2715 = torch.aten.mm %2714, %2711 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1024],f16> -> !torch.vtensor<[?,1024],f16>
    torch.bind_symbolic_shape %2715, [%292], affine_map<()[s0] -> (s0 * 128, 1024)> : !torch.vtensor<[?,1024],f16>
    %int4_2971 = torch.constant.int 4
    %int1024_2972 = torch.constant.int 1024
    %2716 = torch.prim.ListConstruct %int4_2971, %294, %int1024_2972 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2717 = torch.aten.view %2715, %2716 : !torch.vtensor<[?,1024],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1024],f16>
    torch.bind_symbolic_shape %2717, [%292], affine_map<()[s0] -> (4, s0 * 32, 1024)> : !torch.vtensor<[4,?,1024],f16>
    %int4_2973 = torch.constant.int 4
    %int32_2974 = torch.constant.int 32
    %int128_2975 = torch.constant.int 128
    %2718 = torch.prim.ListConstruct %int4_2973, %294, %int32_2974, %int128_2975 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2719 = torch.aten.view %2703, %2718 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %2719, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int4_2976 = torch.constant.int 4
    %int8_2977 = torch.constant.int 8
    %int128_2978 = torch.constant.int 128
    %2720 = torch.prim.ListConstruct %int4_2976, %294, %int8_2977, %int128_2978 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2721 = torch.aten.view %2710, %2720 : !torch.vtensor<[4,?,1024],f16>, !torch.list<int> -> !torch.vtensor<[4,?,8,128],f16>
    torch.bind_symbolic_shape %2721, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 128)> : !torch.vtensor<[4,?,8,128],f16>
    %int4_2979 = torch.constant.int 4
    %int8_2980 = torch.constant.int 8
    %int128_2981 = torch.constant.int 128
    %2722 = torch.prim.ListConstruct %int4_2979, %294, %int8_2980, %int128_2981 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2723 = torch.aten.view %2717, %2722 : !torch.vtensor<[4,?,1024],f16>, !torch.list<int> -> !torch.vtensor<[4,?,8,128],f16>
    torch.bind_symbolic_shape %2723, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 128)> : !torch.vtensor<[4,?,8,128],f16>
    %int131072_2982 = torch.constant.int 131072
    %none_2983 = torch.constant.none
    %none_2984 = torch.constant.none
    %cpu_2985 = torch.constant.device "cpu"
    %false_2986 = torch.constant.bool false
    %2724 = torch.aten.arange %int131072_2982, %none_2983, %none_2984, %cpu_2985, %false_2986 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64>
    %int0_2987 = torch.constant.int 0
    %int128_2988 = torch.constant.int 128
    %int2_2989 = torch.constant.int 2
    %none_2990 = torch.constant.none
    %none_2991 = torch.constant.none
    %cpu_2992 = torch.constant.device "cpu"
    %false_2993 = torch.constant.bool false
    %2725 = torch.aten.arange.start_step %int0_2987, %int128_2988, %int2_2989, %none_2990, %none_2991, %cpu_2992, %false_2993 : !torch.int, !torch.int, !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64>
    %int0_2994 = torch.constant.int 0
    %int0_2995 = torch.constant.int 0
    %int64_2996 = torch.constant.int 64
    %int1_2997 = torch.constant.int 1
    %2726 = torch.aten.slice.Tensor %2725, %int0_2994, %int0_2995, %int64_2996, %int1_2997 : !torch.vtensor<[64],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[64],si64>
    %int6_2998 = torch.constant.int 6
    %2727 = torch.prims.convert_element_type %2726, %int6_2998 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32>
    %int128_2999 = torch.constant.int 128
    %2728 = torch.aten.div.Scalar %2727, %int128_2999 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32>
    %float5.000000e05_3000 = torch.constant.float 5.000000e+05
    %2729 = torch.aten.pow.Scalar %float5.000000e05_3000, %2728 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %2730 = torch.aten.reciprocal %2729 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %float1.000000e00_3001 = torch.constant.float 1.000000e+00
    %2731 = torch.aten.mul.Scalar %2730, %float1.000000e00_3001 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32>
    %int131072_3002 = torch.constant.int 131072
    %int1_3003 = torch.constant.int 1
    %2732 = torch.prim.ListConstruct %int131072_3002, %int1_3003 : (!torch.int, !torch.int) -> !torch.list<int>
    %2733 = torch.aten.view %2724, %2732 : !torch.vtensor<[131072],si64>, !torch.list<int> -> !torch.vtensor<[131072,1],si64>
    %2734 = torch.aten.mul.Tensor %2733, %2731 : !torch.vtensor<[131072,1],si64>, !torch.vtensor<[64],f32> -> !torch.vtensor<[131072,64],f32>
    %2735 = torch.aten.cos %2734 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %2736 = torch.aten.sin %2734 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %2737 = torch.aten.complex %2735, %2736 : !torch.vtensor<[131072,64],f32>, !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],complex<f32>>
    %int1_3004 = torch.constant.int 1
    %2738 = torch.aten.size.int %2703, %int1_3004 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.int
    %int0_3005 = torch.constant.int 0
    %2739 = torch.aten.add.int %int0_3005, %2738 : !torch.int, !torch.int -> !torch.int
    %int0_3006 = torch.constant.int 0
    %int0_3007 = torch.constant.int 0
    %int1_3008 = torch.constant.int 1
    %2740 = torch.aten.slice.Tensor %2737, %int0_3006, %int0_3007, %2739, %int1_3008 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %2740, [%292], affine_map<()[s0] -> (s0 * 32, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_3009 = torch.constant.int 1
    %int0_3010 = torch.constant.int 0
    %int9223372036854775807_3011 = torch.constant.int 9223372036854775807
    %int1_3012 = torch.constant.int 1
    %2741 = torch.aten.slice.Tensor %2740, %int1_3009, %int0_3010, %int9223372036854775807_3011, %int1_3012 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %2741, [%292], affine_map<()[s0] -> (s0 * 32, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_3013 = torch.constant.int 0
    %2742 = torch.aten.unsqueeze %2741, %int0_3013 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %2742, [%292], affine_map<()[s0] -> (1, s0 * 32, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_3014 = torch.constant.int 2
    %2743 = torch.aten.unsqueeze %2742, %int2_3014 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %2743, [%292], affine_map<()[s0] -> (1, s0 * 32, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_3015 = torch.constant.int 3
    %int0_3016 = torch.constant.int 0
    %int9223372036854775807_3017 = torch.constant.int 9223372036854775807
    %int1_3018 = torch.constant.int 1
    %2744 = torch.aten.slice.Tensor %2743, %int3_3015, %int0_3016, %int9223372036854775807_3017, %int1_3018 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %2744, [%292], affine_map<()[s0] -> (1, s0 * 32, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %2745 = torch_c.to_builtin_tensor %2719 : !torch.vtensor<[4,?,32,128],f16> -> tensor<4x?x32x128xf16>
    %c1_3019 = arith.constant 1 : index
    %dim_3020 = tensor.dim %2745, %c1_3019 : tensor<4x?x32x128xf16>
    %2746 = flow.tensor.bitcast %2745 : tensor<4x?x32x128xf16>{%dim_3020} -> tensor<4x?x32x64xcomplex<f16>>{%dim_3020}
    %2747 = torch_c.from_builtin_tensor %2746 : tensor<4x?x32x64xcomplex<f16>> -> !torch.vtensor<[4,?,32,64],complex<f16>>
    torch.bind_symbolic_shape %2747, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 64)> : !torch.vtensor<[4,?,32,64],complex<f16>>
    %2748 = torch.aten.mul.Tensor %2747, %2744 : !torch.vtensor<[4,?,32,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,32,64],complex<f32>>
    torch.bind_symbolic_shape %2748, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 64)> : !torch.vtensor<[4,?,32,64],complex<f32>>
    %2749 = torch_c.to_builtin_tensor %2748 : !torch.vtensor<[4,?,32,64],complex<f32>> -> tensor<4x?x32x64xcomplex<f32>>
    %c1_3021 = arith.constant 1 : index
    %dim_3022 = tensor.dim %2749, %c1_3021 : tensor<4x?x32x64xcomplex<f32>>
    %2750 = flow.tensor.bitcast %2749 : tensor<4x?x32x64xcomplex<f32>>{%dim_3022} -> tensor<4x?x32x128xf32>{%dim_3022}
    %2751 = torch_c.from_builtin_tensor %2750 : tensor<4x?x32x128xf32> -> !torch.vtensor<[4,?,32,128],f32>
    torch.bind_symbolic_shape %2751, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f32>
    %int5_3023 = torch.constant.int 5
    %2752 = torch.prims.convert_element_type %2751, %int5_3023 : !torch.vtensor<[4,?,32,128],f32>, !torch.int -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %2752, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int131072_3024 = torch.constant.int 131072
    %none_3025 = torch.constant.none
    %none_3026 = torch.constant.none
    %cpu_3027 = torch.constant.device "cpu"
    %false_3028 = torch.constant.bool false
    %2753 = torch.aten.arange %int131072_3024, %none_3025, %none_3026, %cpu_3027, %false_3028 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64>
    %int0_3029 = torch.constant.int 0
    %int128_3030 = torch.constant.int 128
    %int2_3031 = torch.constant.int 2
    %none_3032 = torch.constant.none
    %none_3033 = torch.constant.none
    %cpu_3034 = torch.constant.device "cpu"
    %false_3035 = torch.constant.bool false
    %2754 = torch.aten.arange.start_step %int0_3029, %int128_3030, %int2_3031, %none_3032, %none_3033, %cpu_3034, %false_3035 : !torch.int, !torch.int, !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64>
    %int0_3036 = torch.constant.int 0
    %int0_3037 = torch.constant.int 0
    %int64_3038 = torch.constant.int 64
    %int1_3039 = torch.constant.int 1
    %2755 = torch.aten.slice.Tensor %2754, %int0_3036, %int0_3037, %int64_3038, %int1_3039 : !torch.vtensor<[64],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[64],si64>
    %int6_3040 = torch.constant.int 6
    %2756 = torch.prims.convert_element_type %2755, %int6_3040 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32>
    %int128_3041 = torch.constant.int 128
    %2757 = torch.aten.div.Scalar %2756, %int128_3041 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32>
    %float5.000000e05_3042 = torch.constant.float 5.000000e+05
    %2758 = torch.aten.pow.Scalar %float5.000000e05_3042, %2757 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %2759 = torch.aten.reciprocal %2758 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %float1.000000e00_3043 = torch.constant.float 1.000000e+00
    %2760 = torch.aten.mul.Scalar %2759, %float1.000000e00_3043 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32>
    %int131072_3044 = torch.constant.int 131072
    %int1_3045 = torch.constant.int 1
    %2761 = torch.prim.ListConstruct %int131072_3044, %int1_3045 : (!torch.int, !torch.int) -> !torch.list<int>
    %2762 = torch.aten.view %2753, %2761 : !torch.vtensor<[131072],si64>, !torch.list<int> -> !torch.vtensor<[131072,1],si64>
    %2763 = torch.aten.mul.Tensor %2762, %2760 : !torch.vtensor<[131072,1],si64>, !torch.vtensor<[64],f32> -> !torch.vtensor<[131072,64],f32>
    %2764 = torch.aten.cos %2763 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %2765 = torch.aten.sin %2763 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %2766 = torch.aten.complex %2764, %2765 : !torch.vtensor<[131072,64],f32>, !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],complex<f32>>
    %int1_3046 = torch.constant.int 1
    %2767 = torch.aten.size.int %2710, %int1_3046 : !torch.vtensor<[4,?,1024],f16>, !torch.int -> !torch.int
    %int0_3047 = torch.constant.int 0
    %2768 = torch.aten.add.int %int0_3047, %2767 : !torch.int, !torch.int -> !torch.int
    %int0_3048 = torch.constant.int 0
    %int0_3049 = torch.constant.int 0
    %int1_3050 = torch.constant.int 1
    %2769 = torch.aten.slice.Tensor %2766, %int0_3048, %int0_3049, %2768, %int1_3050 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %2769, [%292], affine_map<()[s0] -> (s0 * 32, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_3051 = torch.constant.int 1
    %int0_3052 = torch.constant.int 0
    %int9223372036854775807_3053 = torch.constant.int 9223372036854775807
    %int1_3054 = torch.constant.int 1
    %2770 = torch.aten.slice.Tensor %2769, %int1_3051, %int0_3052, %int9223372036854775807_3053, %int1_3054 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %2770, [%292], affine_map<()[s0] -> (s0 * 32, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_3055 = torch.constant.int 0
    %2771 = torch.aten.unsqueeze %2770, %int0_3055 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %2771, [%292], affine_map<()[s0] -> (1, s0 * 32, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_3056 = torch.constant.int 2
    %2772 = torch.aten.unsqueeze %2771, %int2_3056 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %2772, [%292], affine_map<()[s0] -> (1, s0 * 32, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_3057 = torch.constant.int 3
    %int0_3058 = torch.constant.int 0
    %int9223372036854775807_3059 = torch.constant.int 9223372036854775807
    %int1_3060 = torch.constant.int 1
    %2773 = torch.aten.slice.Tensor %2772, %int3_3057, %int0_3058, %int9223372036854775807_3059, %int1_3060 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %2773, [%292], affine_map<()[s0] -> (1, s0 * 32, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %2774 = torch_c.to_builtin_tensor %2721 : !torch.vtensor<[4,?,8,128],f16> -> tensor<4x?x8x128xf16>
    %c1_3061 = arith.constant 1 : index
    %dim_3062 = tensor.dim %2774, %c1_3061 : tensor<4x?x8x128xf16>
    %2775 = flow.tensor.bitcast %2774 : tensor<4x?x8x128xf16>{%dim_3062} -> tensor<4x?x8x64xcomplex<f16>>{%dim_3062}
    %2776 = torch_c.from_builtin_tensor %2775 : tensor<4x?x8x64xcomplex<f16>> -> !torch.vtensor<[4,?,8,64],complex<f16>>
    torch.bind_symbolic_shape %2776, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 64)> : !torch.vtensor<[4,?,8,64],complex<f16>>
    %2777 = torch.aten.mul.Tensor %2776, %2773 : !torch.vtensor<[4,?,8,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,8,64],complex<f32>>
    torch.bind_symbolic_shape %2777, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 64)> : !torch.vtensor<[4,?,8,64],complex<f32>>
    %2778 = torch_c.to_builtin_tensor %2777 : !torch.vtensor<[4,?,8,64],complex<f32>> -> tensor<4x?x8x64xcomplex<f32>>
    %c1_3063 = arith.constant 1 : index
    %dim_3064 = tensor.dim %2778, %c1_3063 : tensor<4x?x8x64xcomplex<f32>>
    %2779 = flow.tensor.bitcast %2778 : tensor<4x?x8x64xcomplex<f32>>{%dim_3064} -> tensor<4x?x8x128xf32>{%dim_3064}
    %2780 = torch_c.from_builtin_tensor %2779 : tensor<4x?x8x128xf32> -> !torch.vtensor<[4,?,8,128],f32>
    torch.bind_symbolic_shape %2780, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 128)> : !torch.vtensor<[4,?,8,128],f32>
    %int5_3065 = torch.constant.int 5
    %2781 = torch.prims.convert_element_type %2780, %int5_3065 : !torch.vtensor<[4,?,8,128],f32>, !torch.int -> !torch.vtensor<[4,?,8,128],f16>
    torch.bind_symbolic_shape %2781, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 128)> : !torch.vtensor<[4,?,8,128],f16>
    %int64_3066 = torch.constant.int 64
    %2782 = torch.aten.mul.Scalar %arg2, %int64_3066 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %2782, [%292], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int24 = torch.constant.int 24
    %int1_3067 = torch.constant.int 1
    %2783 = torch.aten.add.Scalar %2782, %int24, %int1_3067 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %2783, [%292], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int4_3068 = torch.constant.int 4
    %int32_3069 = torch.constant.int 32
    %int8_3070 = torch.constant.int 8
    %int128_3071 = torch.constant.int 128
    %2784 = torch.prim.ListConstruct %int4_3068, %425, %int32_3069, %int8_3070, %int128_3071 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2785 = torch.aten.view %2781, %2784 : !torch.vtensor<[4,?,8,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,8,128],f16>
    torch.bind_symbolic_shape %2785, [%292], affine_map<()[s0] -> (4, s0, 32, 8, 128)> : !torch.vtensor<[4,?,32,8,128],f16>
    %int4_3072 = torch.constant.int 4
    %2786 = torch.aten.mul.int %int4_3072, %425 : !torch.int, !torch.int -> !torch.int
    %int32_3073 = torch.constant.int 32
    %int8_3074 = torch.constant.int 8
    %int128_3075 = torch.constant.int 128
    %2787 = torch.prim.ListConstruct %2786, %int32_3073, %int8_3074, %int128_3075 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2788 = torch.aten.view %2785, %2787 : !torch.vtensor<[4,?,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %2788, [%292], affine_map<()[s0] -> (s0 * 4, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int4_3076 = torch.constant.int 4
    %2789 = torch.aten.mul.int %int4_3076, %425 : !torch.int, !torch.int -> !torch.int
    %2790 = torch.prim.ListConstruct %2789 : (!torch.int) -> !torch.list<int>
    %2791 = torch.aten.view %2783, %2790 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %2791, [%292], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int32_3077 = torch.constant.int 32
    %int2_3078 = torch.constant.int 2
    %int32_3079 = torch.constant.int 32
    %int8_3080 = torch.constant.int 8
    %int128_3081 = torch.constant.int 128
    %2792 = torch.prim.ListConstruct %416, %int32_3077, %int2_3078, %int32_3079, %int8_3080, %int128_3081 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2793 = torch.aten.view %2625, %2792 : !torch.vtensor<[?,2097152],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f16>
    torch.bind_symbolic_shape %2793, [%293], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f16>
    %int32_3082 = torch.constant.int 32
    %2794 = torch.aten.mul.int %416, %int32_3082 : !torch.int, !torch.int -> !torch.int
    %int2_3083 = torch.constant.int 2
    %2795 = torch.aten.mul.int %2794, %int2_3083 : !torch.int, !torch.int -> !torch.int
    %int32_3084 = torch.constant.int 32
    %int8_3085 = torch.constant.int 8
    %int128_3086 = torch.constant.int 128
    %2796 = torch.prim.ListConstruct %2795, %int32_3084, %int8_3085, %int128_3086 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2797 = torch.aten.view %2793, %2796 : !torch.vtensor<[?,32,2,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %2797, [%293], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %2798 = torch.prim.ListConstruct %2791 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_3087 = torch.constant.bool false
    %2799 = torch.aten.index_put %2797, %2798, %2788, %false_3087 : !torch.vtensor<[?,32,8,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,32,8,128],f16>, !torch.bool -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %2799, [%293], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int32_3088 = torch.constant.int 32
    %int2_3089 = torch.constant.int 2
    %int32_3090 = torch.constant.int 32
    %int8_3091 = torch.constant.int 8
    %int128_3092 = torch.constant.int 128
    %2800 = torch.prim.ListConstruct %416, %int32_3088, %int2_3089, %int32_3090, %int8_3091, %int128_3092 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2801 = torch.aten.view %2799, %2800 : !torch.vtensor<[?,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f16>
    torch.bind_symbolic_shape %2801, [%293], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f16>
    %int2097152_3093 = torch.constant.int 2097152
    %2802 = torch.prim.ListConstruct %416, %int2097152_3093 : (!torch.int, !torch.int) -> !torch.list<int>
    %2803 = torch.aten.view %2801, %2802 : !torch.vtensor<[?,32,2,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,2097152],f16>
    torch.bind_symbolic_shape %2803, [%293], affine_map<()[s0] -> (s0, 2097152)> : !torch.vtensor<[?,2097152],f16>
    %int32_3094 = torch.constant.int 32
    %int2_3095 = torch.constant.int 2
    %int32_3096 = torch.constant.int 32
    %int8_3097 = torch.constant.int 8
    %int128_3098 = torch.constant.int 128
    %2804 = torch.prim.ListConstruct %416, %int32_3094, %int2_3095, %int32_3096, %int8_3097, %int128_3098 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2805 = torch.aten.view %2803, %2804 : !torch.vtensor<[?,2097152],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f16>
    torch.bind_symbolic_shape %2805, [%293], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f16>
    %int32_3099 = torch.constant.int 32
    %int8_3100 = torch.constant.int 8
    %int128_3101 = torch.constant.int 128
    %2806 = torch.prim.ListConstruct %2795, %int32_3099, %int8_3100, %int128_3101 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2807 = torch.aten.view %2805, %2806 : !torch.vtensor<[?,32,2,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %2807, [%293], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int4_3102 = torch.constant.int 4
    %int32_3103 = torch.constant.int 32
    %int8_3104 = torch.constant.int 8
    %int128_3105 = torch.constant.int 128
    %2808 = torch.prim.ListConstruct %int4_3102, %425, %int32_3103, %int8_3104, %int128_3105 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2809 = torch.aten.view %2723, %2808 : !torch.vtensor<[4,?,8,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,8,128],f16>
    torch.bind_symbolic_shape %2809, [%292], affine_map<()[s0] -> (4, s0, 32, 8, 128)> : !torch.vtensor<[4,?,32,8,128],f16>
    %int4_3106 = torch.constant.int 4
    %2810 = torch.aten.mul.int %int4_3106, %425 : !torch.int, !torch.int -> !torch.int
    %int32_3107 = torch.constant.int 32
    %int8_3108 = torch.constant.int 8
    %int128_3109 = torch.constant.int 128
    %2811 = torch.prim.ListConstruct %2810, %int32_3107, %int8_3108, %int128_3109 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2812 = torch.aten.view %2809, %2811 : !torch.vtensor<[4,?,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %2812, [%292], affine_map<()[s0] -> (s0 * 4, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int1_3110 = torch.constant.int 1
    %int1_3111 = torch.constant.int 1
    %2813 = torch.aten.add.Scalar %2783, %int1_3110, %int1_3111 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %2813, [%292], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int4_3112 = torch.constant.int 4
    %2814 = torch.aten.mul.int %int4_3112, %425 : !torch.int, !torch.int -> !torch.int
    %2815 = torch.prim.ListConstruct %2814 : (!torch.int) -> !torch.list<int>
    %2816 = torch.aten.view %2813, %2815 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %2816, [%292], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %2817 = torch.prim.ListConstruct %2816 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_3113 = torch.constant.bool false
    %2818 = torch.aten.index_put %2807, %2817, %2812, %false_3113 : !torch.vtensor<[?,32,8,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,32,8,128],f16>, !torch.bool -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %2818, [%293], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int32_3114 = torch.constant.int 32
    %int2_3115 = torch.constant.int 2
    %int32_3116 = torch.constant.int 32
    %int8_3117 = torch.constant.int 8
    %int128_3118 = torch.constant.int 128
    %2819 = torch.prim.ListConstruct %416, %int32_3114, %int2_3115, %int32_3116, %int8_3117, %int128_3118 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2820 = torch.aten.view %2818, %2819 : !torch.vtensor<[?,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f16>
    torch.bind_symbolic_shape %2820, [%293], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f16>
    %int2097152_3119 = torch.constant.int 2097152
    %2821 = torch.prim.ListConstruct %416, %int2097152_3119 : (!torch.int, !torch.int) -> !torch.list<int>
    %2822 = torch.aten.view %2820, %2821 : !torch.vtensor<[?,32,2,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,2097152],f16>
    torch.bind_symbolic_shape %2822, [%293], affine_map<()[s0] -> (s0, 2097152)> : !torch.vtensor<[?,2097152],f16>
    %int-2_3120 = torch.constant.int -2
    %2823 = torch.aten.unsqueeze %2781, %int-2_3120 : !torch.vtensor<[4,?,8,128],f16>, !torch.int -> !torch.vtensor<[4,?,8,1,128],f16>
    torch.bind_symbolic_shape %2823, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 1, 128)> : !torch.vtensor<[4,?,8,1,128],f16>
    %int4_3121 = torch.constant.int 4
    %int8_3122 = torch.constant.int 8
    %int4_3123 = torch.constant.int 4
    %int128_3124 = torch.constant.int 128
    %2824 = torch.prim.ListConstruct %int4_3121, %2767, %int8_3122, %int4_3123, %int128_3124 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_3125 = torch.constant.bool false
    %2825 = torch.aten.expand %2823, %2824, %false_3125 : !torch.vtensor<[4,?,8,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,8,4,128],f16>
    torch.bind_symbolic_shape %2825, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 4, 128)> : !torch.vtensor<[4,?,8,4,128],f16>
    %int0_3126 = torch.constant.int 0
    %2826 = torch.aten.clone %2825, %int0_3126 : !torch.vtensor<[4,?,8,4,128],f16>, !torch.int -> !torch.vtensor<[4,?,8,4,128],f16>
    torch.bind_symbolic_shape %2826, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 4, 128)> : !torch.vtensor<[4,?,8,4,128],f16>
    %int4_3127 = torch.constant.int 4
    %int32_3128 = torch.constant.int 32
    %int128_3129 = torch.constant.int 128
    %2827 = torch.prim.ListConstruct %int4_3127, %2767, %int32_3128, %int128_3129 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2828 = torch.aten._unsafe_view %2826, %2827 : !torch.vtensor<[4,?,8,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %2828, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int-2_3130 = torch.constant.int -2
    %2829 = torch.aten.unsqueeze %2723, %int-2_3130 : !torch.vtensor<[4,?,8,128],f16>, !torch.int -> !torch.vtensor<[4,?,8,1,128],f16>
    torch.bind_symbolic_shape %2829, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 1, 128)> : !torch.vtensor<[4,?,8,1,128],f16>
    %int1_3131 = torch.constant.int 1
    %2830 = torch.aten.size.int %2717, %int1_3131 : !torch.vtensor<[4,?,1024],f16>, !torch.int -> !torch.int
    %int4_3132 = torch.constant.int 4
    %int8_3133 = torch.constant.int 8
    %int4_3134 = torch.constant.int 4
    %int128_3135 = torch.constant.int 128
    %2831 = torch.prim.ListConstruct %int4_3132, %2830, %int8_3133, %int4_3134, %int128_3135 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_3136 = torch.constant.bool false
    %2832 = torch.aten.expand %2829, %2831, %false_3136 : !torch.vtensor<[4,?,8,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,8,4,128],f16>
    torch.bind_symbolic_shape %2832, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 4, 128)> : !torch.vtensor<[4,?,8,4,128],f16>
    %int0_3137 = torch.constant.int 0
    %2833 = torch.aten.clone %2832, %int0_3137 : !torch.vtensor<[4,?,8,4,128],f16>, !torch.int -> !torch.vtensor<[4,?,8,4,128],f16>
    torch.bind_symbolic_shape %2833, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 4, 128)> : !torch.vtensor<[4,?,8,4,128],f16>
    %int4_3138 = torch.constant.int 4
    %int32_3139 = torch.constant.int 32
    %int128_3140 = torch.constant.int 128
    %2834 = torch.prim.ListConstruct %int4_3138, %2830, %int32_3139, %int128_3140 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2835 = torch.aten._unsafe_view %2833, %2834 : !torch.vtensor<[4,?,8,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %2835, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int1_3141 = torch.constant.int 1
    %int2_3142 = torch.constant.int 2
    %2836 = torch.aten.transpose.int %2752, %int1_3141, %int2_3142 : !torch.vtensor<[4,?,32,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,32,?,128],f16>
    torch.bind_symbolic_shape %2836, [%292], affine_map<()[s0] -> (4, 32, s0 * 32, 128)> : !torch.vtensor<[4,32,?,128],f16>
    %int1_3143 = torch.constant.int 1
    %int2_3144 = torch.constant.int 2
    %2837 = torch.aten.transpose.int %2828, %int1_3143, %int2_3144 : !torch.vtensor<[4,?,32,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,32,?,128],f16>
    torch.bind_symbolic_shape %2837, [%292], affine_map<()[s0] -> (4, 32, s0 * 32, 128)> : !torch.vtensor<[4,32,?,128],f16>
    %int1_3145 = torch.constant.int 1
    %int2_3146 = torch.constant.int 2
    %2838 = torch.aten.transpose.int %2835, %int1_3145, %int2_3146 : !torch.vtensor<[4,?,32,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,32,?,128],f16>
    torch.bind_symbolic_shape %2838, [%292], affine_map<()[s0] -> (4, 32, s0 * 32, 128)> : !torch.vtensor<[4,32,?,128],f16>
    %float0.000000e00_3147 = torch.constant.float 0.000000e+00
    %false_3148 = torch.constant.bool false
    %none_3149 = torch.constant.none
    %2839:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%2836, %2837, %2838, %float0.000000e00_3147, %false_3148, %320, %none_3149) : (!torch.vtensor<[4,32,?,128],f16>, !torch.vtensor<[4,32,?,128],f16>, !torch.vtensor<[4,32,?,128],f16>, !torch.float, !torch.bool, !torch.vtensor<[4,1,?,?],f16>, !torch.none) -> (!torch.vtensor<[4,32,?,128],f16>, !torch.vtensor<[4,32,?],f32>) 
    torch.bind_symbolic_shape %2839#0, [%292], affine_map<()[s0] -> (4, 32, s0 * 32, 128)> : !torch.vtensor<[4,32,?,128],f16>
    %int1_3150 = torch.constant.int 1
    %int2_3151 = torch.constant.int 2
    %2840 = torch.aten.transpose.int %2839#0, %int1_3150, %int2_3151 : !torch.vtensor<[4,32,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %2840, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int4_3152 = torch.constant.int 4
    %int4096_3153 = torch.constant.int 4096
    %2841 = torch.prim.ListConstruct %int4_3152, %2738, %int4096_3153 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2842 = torch.aten.view %2840, %2841 : !torch.vtensor<[4,?,32,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %2842, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int-2_3154 = torch.constant.int -2
    %int-1_3155 = torch.constant.int -1
    %2843 = torch.aten.transpose.int %113, %int-2_3154, %int-1_3155 : !torch.vtensor<[4096,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,4096],f16>
    %int4_3156 = torch.constant.int 4
    %2844 = torch.aten.mul.int %int4_3156, %2738 : !torch.int, !torch.int -> !torch.int
    %int4096_3157 = torch.constant.int 4096
    %2845 = torch.prim.ListConstruct %2844, %int4096_3157 : (!torch.int, !torch.int) -> !torch.list<int>
    %2846 = torch.aten.view %2842, %2845 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %2846, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %2847 = torch.aten.mm %2846, %2843 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %2847, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_3158 = torch.constant.int 4
    %int4096_3159 = torch.constant.int 4096
    %2848 = torch.prim.ListConstruct %int4_3158, %2738, %int4096_3159 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2849 = torch.aten.view %2847, %2848 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %2849, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_3160 = torch.constant.int 1
    %2850 = torch.aten.add.Tensor %2687, %2849, %int1_3160 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %2850, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int6_3161 = torch.constant.int 6
    %2851 = torch.prims.convert_element_type %2850, %int6_3161 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %2851, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_3162 = torch.constant.int 2
    %2852 = torch.aten.pow.Tensor_Scalar %2851, %int2_3162 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %2852, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int-1_3163 = torch.constant.int -1
    %2853 = torch.prim.ListConstruct %int-1_3163 : (!torch.int) -> !torch.list<int>
    %true_3164 = torch.constant.bool true
    %none_3165 = torch.constant.none
    %2854 = torch.aten.mean.dim %2852, %2853, %true_3164, %none_3165 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %2854, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_3166 = torch.constant.float 9.9999997473787516E-6
    %int1_3167 = torch.constant.int 1
    %2855 = torch.aten.add.Scalar %2854, %float9.999990e-06_3166, %int1_3167 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %2855, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %2856 = torch.aten.rsqrt %2855 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %2856, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %2857 = torch.aten.mul.Tensor %2851, %2856 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %2857, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %2858 = torch.aten.mul.Tensor %114, %2857 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %2858, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int5_3168 = torch.constant.int 5
    %2859 = torch.prims.convert_element_type %2858, %int5_3168 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %2859, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int-2_3169 = torch.constant.int -2
    %int-1_3170 = torch.constant.int -1
    %2860 = torch.aten.transpose.int %115, %int-2_3169, %int-1_3170 : !torch.vtensor<[14336,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,14336],f16>
    %int4_3171 = torch.constant.int 4
    %2861 = torch.aten.mul.int %int4_3171, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_3172 = torch.constant.int 4096
    %2862 = torch.prim.ListConstruct %2861, %int4096_3172 : (!torch.int, !torch.int) -> !torch.list<int>
    %2863 = torch.aten.view %2859, %2862 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %2863, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %2864 = torch.aten.mm %2863, %2860 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,14336],f16> -> !torch.vtensor<[?,14336],f16>
    torch.bind_symbolic_shape %2864, [%292], affine_map<()[s0] -> (s0 * 128, 14336)> : !torch.vtensor<[?,14336],f16>
    %int4_3173 = torch.constant.int 4
    %int14336_3174 = torch.constant.int 14336
    %2865 = torch.prim.ListConstruct %int4_3173, %294, %int14336_3174 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2866 = torch.aten.view %2864, %2865 : !torch.vtensor<[?,14336],f16>, !torch.list<int> -> !torch.vtensor<[4,?,14336],f16>
    torch.bind_symbolic_shape %2866, [%292], affine_map<()[s0] -> (4, s0 * 32, 14336)> : !torch.vtensor<[4,?,14336],f16>
    %2867 = torch.aten.silu %2866 : !torch.vtensor<[4,?,14336],f16> -> !torch.vtensor<[4,?,14336],f16>
    torch.bind_symbolic_shape %2867, [%292], affine_map<()[s0] -> (4, s0 * 32, 14336)> : !torch.vtensor<[4,?,14336],f16>
    %int-2_3175 = torch.constant.int -2
    %int-1_3176 = torch.constant.int -1
    %2868 = torch.aten.transpose.int %116, %int-2_3175, %int-1_3176 : !torch.vtensor<[14336,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,14336],f16>
    %int4_3177 = torch.constant.int 4
    %2869 = torch.aten.mul.int %int4_3177, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_3178 = torch.constant.int 4096
    %2870 = torch.prim.ListConstruct %2869, %int4096_3178 : (!torch.int, !torch.int) -> !torch.list<int>
    %2871 = torch.aten.view %2859, %2870 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %2871, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %2872 = torch.aten.mm %2871, %2868 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,14336],f16> -> !torch.vtensor<[?,14336],f16>
    torch.bind_symbolic_shape %2872, [%292], affine_map<()[s0] -> (s0 * 128, 14336)> : !torch.vtensor<[?,14336],f16>
    %int4_3179 = torch.constant.int 4
    %int14336_3180 = torch.constant.int 14336
    %2873 = torch.prim.ListConstruct %int4_3179, %294, %int14336_3180 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2874 = torch.aten.view %2872, %2873 : !torch.vtensor<[?,14336],f16>, !torch.list<int> -> !torch.vtensor<[4,?,14336],f16>
    torch.bind_symbolic_shape %2874, [%292], affine_map<()[s0] -> (4, s0 * 32, 14336)> : !torch.vtensor<[4,?,14336],f16>
    %2875 = torch.aten.mul.Tensor %2867, %2874 : !torch.vtensor<[4,?,14336],f16>, !torch.vtensor<[4,?,14336],f16> -> !torch.vtensor<[4,?,14336],f16>
    torch.bind_symbolic_shape %2875, [%292], affine_map<()[s0] -> (4, s0 * 32, 14336)> : !torch.vtensor<[4,?,14336],f16>
    %int-2_3181 = torch.constant.int -2
    %int-1_3182 = torch.constant.int -1
    %2876 = torch.aten.transpose.int %117, %int-2_3181, %int-1_3182 : !torch.vtensor<[4096,14336],f16>, !torch.int, !torch.int -> !torch.vtensor<[14336,4096],f16>
    %int1_3183 = torch.constant.int 1
    %2877 = torch.aten.size.int %2866, %int1_3183 : !torch.vtensor<[4,?,14336],f16>, !torch.int -> !torch.int
    %int4_3184 = torch.constant.int 4
    %2878 = torch.aten.mul.int %int4_3184, %2877 : !torch.int, !torch.int -> !torch.int
    %int14336_3185 = torch.constant.int 14336
    %2879 = torch.prim.ListConstruct %2878, %int14336_3185 : (!torch.int, !torch.int) -> !torch.list<int>
    %2880 = torch.aten.view %2875, %2879 : !torch.vtensor<[4,?,14336],f16>, !torch.list<int> -> !torch.vtensor<[?,14336],f16>
    torch.bind_symbolic_shape %2880, [%292], affine_map<()[s0] -> (s0 * 128, 14336)> : !torch.vtensor<[?,14336],f16>
    %2881 = torch.aten.mm %2880, %2876 : !torch.vtensor<[?,14336],f16>, !torch.vtensor<[14336,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %2881, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_3186 = torch.constant.int 4
    %int4096_3187 = torch.constant.int 4096
    %2882 = torch.prim.ListConstruct %int4_3186, %2877, %int4096_3187 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2883 = torch.aten.view %2881, %2882 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %2883, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_3188 = torch.constant.int 1
    %2884 = torch.aten.add.Tensor %2850, %2883, %int1_3188 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %2884, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int6_3189 = torch.constant.int 6
    %2885 = torch.prims.convert_element_type %2884, %int6_3189 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %2885, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_3190 = torch.constant.int 2
    %2886 = torch.aten.pow.Tensor_Scalar %2885, %int2_3190 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %2886, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int-1_3191 = torch.constant.int -1
    %2887 = torch.prim.ListConstruct %int-1_3191 : (!torch.int) -> !torch.list<int>
    %true_3192 = torch.constant.bool true
    %none_3193 = torch.constant.none
    %2888 = torch.aten.mean.dim %2886, %2887, %true_3192, %none_3193 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %2888, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_3194 = torch.constant.float 9.9999997473787516E-6
    %int1_3195 = torch.constant.int 1
    %2889 = torch.aten.add.Scalar %2888, %float9.999990e-06_3194, %int1_3195 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %2889, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %2890 = torch.aten.rsqrt %2889 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %2890, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %2891 = torch.aten.mul.Tensor %2885, %2890 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %2891, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %2892 = torch.aten.mul.Tensor %118, %2891 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %2892, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int5_3196 = torch.constant.int 5
    %2893 = torch.prims.convert_element_type %2892, %int5_3196 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %2893, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int-2_3197 = torch.constant.int -2
    %int-1_3198 = torch.constant.int -1
    %2894 = torch.aten.transpose.int %119, %int-2_3197, %int-1_3198 : !torch.vtensor<[4096,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,4096],f16>
    %int4_3199 = torch.constant.int 4
    %2895 = torch.aten.mul.int %int4_3199, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_3200 = torch.constant.int 4096
    %2896 = torch.prim.ListConstruct %2895, %int4096_3200 : (!torch.int, !torch.int) -> !torch.list<int>
    %2897 = torch.aten.view %2893, %2896 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %2897, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %2898 = torch.aten.mm %2897, %2894 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %2898, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_3201 = torch.constant.int 4
    %int4096_3202 = torch.constant.int 4096
    %2899 = torch.prim.ListConstruct %int4_3201, %294, %int4096_3202 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2900 = torch.aten.view %2898, %2899 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %2900, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int-2_3203 = torch.constant.int -2
    %int-1_3204 = torch.constant.int -1
    %2901 = torch.aten.transpose.int %120, %int-2_3203, %int-1_3204 : !torch.vtensor<[1024,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,1024],f16>
    %int4_3205 = torch.constant.int 4
    %2902 = torch.aten.mul.int %int4_3205, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_3206 = torch.constant.int 4096
    %2903 = torch.prim.ListConstruct %2902, %int4096_3206 : (!torch.int, !torch.int) -> !torch.list<int>
    %2904 = torch.aten.view %2893, %2903 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %2904, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %2905 = torch.aten.mm %2904, %2901 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1024],f16> -> !torch.vtensor<[?,1024],f16>
    torch.bind_symbolic_shape %2905, [%292], affine_map<()[s0] -> (s0 * 128, 1024)> : !torch.vtensor<[?,1024],f16>
    %int4_3207 = torch.constant.int 4
    %int1024_3208 = torch.constant.int 1024
    %2906 = torch.prim.ListConstruct %int4_3207, %294, %int1024_3208 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2907 = torch.aten.view %2905, %2906 : !torch.vtensor<[?,1024],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1024],f16>
    torch.bind_symbolic_shape %2907, [%292], affine_map<()[s0] -> (4, s0 * 32, 1024)> : !torch.vtensor<[4,?,1024],f16>
    %int-2_3209 = torch.constant.int -2
    %int-1_3210 = torch.constant.int -1
    %2908 = torch.aten.transpose.int %121, %int-2_3209, %int-1_3210 : !torch.vtensor<[1024,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,1024],f16>
    %int4_3211 = torch.constant.int 4
    %2909 = torch.aten.mul.int %int4_3211, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_3212 = torch.constant.int 4096
    %2910 = torch.prim.ListConstruct %2909, %int4096_3212 : (!torch.int, !torch.int) -> !torch.list<int>
    %2911 = torch.aten.view %2893, %2910 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %2911, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %2912 = torch.aten.mm %2911, %2908 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1024],f16> -> !torch.vtensor<[?,1024],f16>
    torch.bind_symbolic_shape %2912, [%292], affine_map<()[s0] -> (s0 * 128, 1024)> : !torch.vtensor<[?,1024],f16>
    %int4_3213 = torch.constant.int 4
    %int1024_3214 = torch.constant.int 1024
    %2913 = torch.prim.ListConstruct %int4_3213, %294, %int1024_3214 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2914 = torch.aten.view %2912, %2913 : !torch.vtensor<[?,1024],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1024],f16>
    torch.bind_symbolic_shape %2914, [%292], affine_map<()[s0] -> (4, s0 * 32, 1024)> : !torch.vtensor<[4,?,1024],f16>
    %int4_3215 = torch.constant.int 4
    %int32_3216 = torch.constant.int 32
    %int128_3217 = torch.constant.int 128
    %2915 = torch.prim.ListConstruct %int4_3215, %294, %int32_3216, %int128_3217 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2916 = torch.aten.view %2900, %2915 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %2916, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int4_3218 = torch.constant.int 4
    %int8_3219 = torch.constant.int 8
    %int128_3220 = torch.constant.int 128
    %2917 = torch.prim.ListConstruct %int4_3218, %294, %int8_3219, %int128_3220 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2918 = torch.aten.view %2907, %2917 : !torch.vtensor<[4,?,1024],f16>, !torch.list<int> -> !torch.vtensor<[4,?,8,128],f16>
    torch.bind_symbolic_shape %2918, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 128)> : !torch.vtensor<[4,?,8,128],f16>
    %int4_3221 = torch.constant.int 4
    %int8_3222 = torch.constant.int 8
    %int128_3223 = torch.constant.int 128
    %2919 = torch.prim.ListConstruct %int4_3221, %294, %int8_3222, %int128_3223 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2920 = torch.aten.view %2914, %2919 : !torch.vtensor<[4,?,1024],f16>, !torch.list<int> -> !torch.vtensor<[4,?,8,128],f16>
    torch.bind_symbolic_shape %2920, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 128)> : !torch.vtensor<[4,?,8,128],f16>
    %int131072_3224 = torch.constant.int 131072
    %none_3225 = torch.constant.none
    %none_3226 = torch.constant.none
    %cpu_3227 = torch.constant.device "cpu"
    %false_3228 = torch.constant.bool false
    %2921 = torch.aten.arange %int131072_3224, %none_3225, %none_3226, %cpu_3227, %false_3228 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64>
    %int0_3229 = torch.constant.int 0
    %int128_3230 = torch.constant.int 128
    %int2_3231 = torch.constant.int 2
    %none_3232 = torch.constant.none
    %none_3233 = torch.constant.none
    %cpu_3234 = torch.constant.device "cpu"
    %false_3235 = torch.constant.bool false
    %2922 = torch.aten.arange.start_step %int0_3229, %int128_3230, %int2_3231, %none_3232, %none_3233, %cpu_3234, %false_3235 : !torch.int, !torch.int, !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64>
    %int0_3236 = torch.constant.int 0
    %int0_3237 = torch.constant.int 0
    %int64_3238 = torch.constant.int 64
    %int1_3239 = torch.constant.int 1
    %2923 = torch.aten.slice.Tensor %2922, %int0_3236, %int0_3237, %int64_3238, %int1_3239 : !torch.vtensor<[64],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[64],si64>
    %int6_3240 = torch.constant.int 6
    %2924 = torch.prims.convert_element_type %2923, %int6_3240 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32>
    %int128_3241 = torch.constant.int 128
    %2925 = torch.aten.div.Scalar %2924, %int128_3241 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32>
    %float5.000000e05_3242 = torch.constant.float 5.000000e+05
    %2926 = torch.aten.pow.Scalar %float5.000000e05_3242, %2925 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %2927 = torch.aten.reciprocal %2926 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %float1.000000e00_3243 = torch.constant.float 1.000000e+00
    %2928 = torch.aten.mul.Scalar %2927, %float1.000000e00_3243 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32>
    %int131072_3244 = torch.constant.int 131072
    %int1_3245 = torch.constant.int 1
    %2929 = torch.prim.ListConstruct %int131072_3244, %int1_3245 : (!torch.int, !torch.int) -> !torch.list<int>
    %2930 = torch.aten.view %2921, %2929 : !torch.vtensor<[131072],si64>, !torch.list<int> -> !torch.vtensor<[131072,1],si64>
    %2931 = torch.aten.mul.Tensor %2930, %2928 : !torch.vtensor<[131072,1],si64>, !torch.vtensor<[64],f32> -> !torch.vtensor<[131072,64],f32>
    %2932 = torch.aten.cos %2931 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %2933 = torch.aten.sin %2931 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %2934 = torch.aten.complex %2932, %2933 : !torch.vtensor<[131072,64],f32>, !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],complex<f32>>
    %int1_3246 = torch.constant.int 1
    %2935 = torch.aten.size.int %2900, %int1_3246 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.int
    %int0_3247 = torch.constant.int 0
    %2936 = torch.aten.add.int %int0_3247, %2935 : !torch.int, !torch.int -> !torch.int
    %int0_3248 = torch.constant.int 0
    %int0_3249 = torch.constant.int 0
    %int1_3250 = torch.constant.int 1
    %2937 = torch.aten.slice.Tensor %2934, %int0_3248, %int0_3249, %2936, %int1_3250 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %2937, [%292], affine_map<()[s0] -> (s0 * 32, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_3251 = torch.constant.int 1
    %int0_3252 = torch.constant.int 0
    %int9223372036854775807_3253 = torch.constant.int 9223372036854775807
    %int1_3254 = torch.constant.int 1
    %2938 = torch.aten.slice.Tensor %2937, %int1_3251, %int0_3252, %int9223372036854775807_3253, %int1_3254 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %2938, [%292], affine_map<()[s0] -> (s0 * 32, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_3255 = torch.constant.int 0
    %2939 = torch.aten.unsqueeze %2938, %int0_3255 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %2939, [%292], affine_map<()[s0] -> (1, s0 * 32, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_3256 = torch.constant.int 2
    %2940 = torch.aten.unsqueeze %2939, %int2_3256 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %2940, [%292], affine_map<()[s0] -> (1, s0 * 32, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_3257 = torch.constant.int 3
    %int0_3258 = torch.constant.int 0
    %int9223372036854775807_3259 = torch.constant.int 9223372036854775807
    %int1_3260 = torch.constant.int 1
    %2941 = torch.aten.slice.Tensor %2940, %int3_3257, %int0_3258, %int9223372036854775807_3259, %int1_3260 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %2941, [%292], affine_map<()[s0] -> (1, s0 * 32, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %2942 = torch_c.to_builtin_tensor %2916 : !torch.vtensor<[4,?,32,128],f16> -> tensor<4x?x32x128xf16>
    %c1_3261 = arith.constant 1 : index
    %dim_3262 = tensor.dim %2942, %c1_3261 : tensor<4x?x32x128xf16>
    %2943 = flow.tensor.bitcast %2942 : tensor<4x?x32x128xf16>{%dim_3262} -> tensor<4x?x32x64xcomplex<f16>>{%dim_3262}
    %2944 = torch_c.from_builtin_tensor %2943 : tensor<4x?x32x64xcomplex<f16>> -> !torch.vtensor<[4,?,32,64],complex<f16>>
    torch.bind_symbolic_shape %2944, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 64)> : !torch.vtensor<[4,?,32,64],complex<f16>>
    %2945 = torch.aten.mul.Tensor %2944, %2941 : !torch.vtensor<[4,?,32,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,32,64],complex<f32>>
    torch.bind_symbolic_shape %2945, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 64)> : !torch.vtensor<[4,?,32,64],complex<f32>>
    %2946 = torch_c.to_builtin_tensor %2945 : !torch.vtensor<[4,?,32,64],complex<f32>> -> tensor<4x?x32x64xcomplex<f32>>
    %c1_3263 = arith.constant 1 : index
    %dim_3264 = tensor.dim %2946, %c1_3263 : tensor<4x?x32x64xcomplex<f32>>
    %2947 = flow.tensor.bitcast %2946 : tensor<4x?x32x64xcomplex<f32>>{%dim_3264} -> tensor<4x?x32x128xf32>{%dim_3264}
    %2948 = torch_c.from_builtin_tensor %2947 : tensor<4x?x32x128xf32> -> !torch.vtensor<[4,?,32,128],f32>
    torch.bind_symbolic_shape %2948, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f32>
    %int5_3265 = torch.constant.int 5
    %2949 = torch.prims.convert_element_type %2948, %int5_3265 : !torch.vtensor<[4,?,32,128],f32>, !torch.int -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %2949, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int131072_3266 = torch.constant.int 131072
    %none_3267 = torch.constant.none
    %none_3268 = torch.constant.none
    %cpu_3269 = torch.constant.device "cpu"
    %false_3270 = torch.constant.bool false
    %2950 = torch.aten.arange %int131072_3266, %none_3267, %none_3268, %cpu_3269, %false_3270 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64>
    %int0_3271 = torch.constant.int 0
    %int128_3272 = torch.constant.int 128
    %int2_3273 = torch.constant.int 2
    %none_3274 = torch.constant.none
    %none_3275 = torch.constant.none
    %cpu_3276 = torch.constant.device "cpu"
    %false_3277 = torch.constant.bool false
    %2951 = torch.aten.arange.start_step %int0_3271, %int128_3272, %int2_3273, %none_3274, %none_3275, %cpu_3276, %false_3277 : !torch.int, !torch.int, !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64>
    %int0_3278 = torch.constant.int 0
    %int0_3279 = torch.constant.int 0
    %int64_3280 = torch.constant.int 64
    %int1_3281 = torch.constant.int 1
    %2952 = torch.aten.slice.Tensor %2951, %int0_3278, %int0_3279, %int64_3280, %int1_3281 : !torch.vtensor<[64],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[64],si64>
    %int6_3282 = torch.constant.int 6
    %2953 = torch.prims.convert_element_type %2952, %int6_3282 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32>
    %int128_3283 = torch.constant.int 128
    %2954 = torch.aten.div.Scalar %2953, %int128_3283 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32>
    %float5.000000e05_3284 = torch.constant.float 5.000000e+05
    %2955 = torch.aten.pow.Scalar %float5.000000e05_3284, %2954 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %2956 = torch.aten.reciprocal %2955 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %float1.000000e00_3285 = torch.constant.float 1.000000e+00
    %2957 = torch.aten.mul.Scalar %2956, %float1.000000e00_3285 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32>
    %int131072_3286 = torch.constant.int 131072
    %int1_3287 = torch.constant.int 1
    %2958 = torch.prim.ListConstruct %int131072_3286, %int1_3287 : (!torch.int, !torch.int) -> !torch.list<int>
    %2959 = torch.aten.view %2950, %2958 : !torch.vtensor<[131072],si64>, !torch.list<int> -> !torch.vtensor<[131072,1],si64>
    %2960 = torch.aten.mul.Tensor %2959, %2957 : !torch.vtensor<[131072,1],si64>, !torch.vtensor<[64],f32> -> !torch.vtensor<[131072,64],f32>
    %2961 = torch.aten.cos %2960 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %2962 = torch.aten.sin %2960 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %2963 = torch.aten.complex %2961, %2962 : !torch.vtensor<[131072,64],f32>, !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],complex<f32>>
    %int1_3288 = torch.constant.int 1
    %2964 = torch.aten.size.int %2907, %int1_3288 : !torch.vtensor<[4,?,1024],f16>, !torch.int -> !torch.int
    %int0_3289 = torch.constant.int 0
    %2965 = torch.aten.add.int %int0_3289, %2964 : !torch.int, !torch.int -> !torch.int
    %int0_3290 = torch.constant.int 0
    %int0_3291 = torch.constant.int 0
    %int1_3292 = torch.constant.int 1
    %2966 = torch.aten.slice.Tensor %2963, %int0_3290, %int0_3291, %2965, %int1_3292 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %2966, [%292], affine_map<()[s0] -> (s0 * 32, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_3293 = torch.constant.int 1
    %int0_3294 = torch.constant.int 0
    %int9223372036854775807_3295 = torch.constant.int 9223372036854775807
    %int1_3296 = torch.constant.int 1
    %2967 = torch.aten.slice.Tensor %2966, %int1_3293, %int0_3294, %int9223372036854775807_3295, %int1_3296 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %2967, [%292], affine_map<()[s0] -> (s0 * 32, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_3297 = torch.constant.int 0
    %2968 = torch.aten.unsqueeze %2967, %int0_3297 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %2968, [%292], affine_map<()[s0] -> (1, s0 * 32, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_3298 = torch.constant.int 2
    %2969 = torch.aten.unsqueeze %2968, %int2_3298 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %2969, [%292], affine_map<()[s0] -> (1, s0 * 32, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_3299 = torch.constant.int 3
    %int0_3300 = torch.constant.int 0
    %int9223372036854775807_3301 = torch.constant.int 9223372036854775807
    %int1_3302 = torch.constant.int 1
    %2970 = torch.aten.slice.Tensor %2969, %int3_3299, %int0_3300, %int9223372036854775807_3301, %int1_3302 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %2970, [%292], affine_map<()[s0] -> (1, s0 * 32, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %2971 = torch_c.to_builtin_tensor %2918 : !torch.vtensor<[4,?,8,128],f16> -> tensor<4x?x8x128xf16>
    %c1_3303 = arith.constant 1 : index
    %dim_3304 = tensor.dim %2971, %c1_3303 : tensor<4x?x8x128xf16>
    %2972 = flow.tensor.bitcast %2971 : tensor<4x?x8x128xf16>{%dim_3304} -> tensor<4x?x8x64xcomplex<f16>>{%dim_3304}
    %2973 = torch_c.from_builtin_tensor %2972 : tensor<4x?x8x64xcomplex<f16>> -> !torch.vtensor<[4,?,8,64],complex<f16>>
    torch.bind_symbolic_shape %2973, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 64)> : !torch.vtensor<[4,?,8,64],complex<f16>>
    %2974 = torch.aten.mul.Tensor %2973, %2970 : !torch.vtensor<[4,?,8,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,8,64],complex<f32>>
    torch.bind_symbolic_shape %2974, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 64)> : !torch.vtensor<[4,?,8,64],complex<f32>>
    %2975 = torch_c.to_builtin_tensor %2974 : !torch.vtensor<[4,?,8,64],complex<f32>> -> tensor<4x?x8x64xcomplex<f32>>
    %c1_3305 = arith.constant 1 : index
    %dim_3306 = tensor.dim %2975, %c1_3305 : tensor<4x?x8x64xcomplex<f32>>
    %2976 = flow.tensor.bitcast %2975 : tensor<4x?x8x64xcomplex<f32>>{%dim_3306} -> tensor<4x?x8x128xf32>{%dim_3306}
    %2977 = torch_c.from_builtin_tensor %2976 : tensor<4x?x8x128xf32> -> !torch.vtensor<[4,?,8,128],f32>
    torch.bind_symbolic_shape %2977, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 128)> : !torch.vtensor<[4,?,8,128],f32>
    %int5_3307 = torch.constant.int 5
    %2978 = torch.prims.convert_element_type %2977, %int5_3307 : !torch.vtensor<[4,?,8,128],f32>, !torch.int -> !torch.vtensor<[4,?,8,128],f16>
    torch.bind_symbolic_shape %2978, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 128)> : !torch.vtensor<[4,?,8,128],f16>
    %int64_3308 = torch.constant.int 64
    %2979 = torch.aten.mul.Scalar %arg2, %int64_3308 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %2979, [%292], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int26 = torch.constant.int 26
    %int1_3309 = torch.constant.int 1
    %2980 = torch.aten.add.Scalar %2979, %int26, %int1_3309 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %2980, [%292], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int4_3310 = torch.constant.int 4
    %int32_3311 = torch.constant.int 32
    %int8_3312 = torch.constant.int 8
    %int128_3313 = torch.constant.int 128
    %2981 = torch.prim.ListConstruct %int4_3310, %425, %int32_3311, %int8_3312, %int128_3313 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2982 = torch.aten.view %2978, %2981 : !torch.vtensor<[4,?,8,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,8,128],f16>
    torch.bind_symbolic_shape %2982, [%292], affine_map<()[s0] -> (4, s0, 32, 8, 128)> : !torch.vtensor<[4,?,32,8,128],f16>
    %int4_3314 = torch.constant.int 4
    %2983 = torch.aten.mul.int %int4_3314, %425 : !torch.int, !torch.int -> !torch.int
    %int32_3315 = torch.constant.int 32
    %int8_3316 = torch.constant.int 8
    %int128_3317 = torch.constant.int 128
    %2984 = torch.prim.ListConstruct %2983, %int32_3315, %int8_3316, %int128_3317 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2985 = torch.aten.view %2982, %2984 : !torch.vtensor<[4,?,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %2985, [%292], affine_map<()[s0] -> (s0 * 4, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int4_3318 = torch.constant.int 4
    %2986 = torch.aten.mul.int %int4_3318, %425 : !torch.int, !torch.int -> !torch.int
    %2987 = torch.prim.ListConstruct %2986 : (!torch.int) -> !torch.list<int>
    %2988 = torch.aten.view %2980, %2987 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %2988, [%292], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int32_3319 = torch.constant.int 32
    %int2_3320 = torch.constant.int 2
    %int32_3321 = torch.constant.int 32
    %int8_3322 = torch.constant.int 8
    %int128_3323 = torch.constant.int 128
    %2989 = torch.prim.ListConstruct %416, %int32_3319, %int2_3320, %int32_3321, %int8_3322, %int128_3323 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2990 = torch.aten.view %2822, %2989 : !torch.vtensor<[?,2097152],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f16>
    torch.bind_symbolic_shape %2990, [%293], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f16>
    %int32_3324 = torch.constant.int 32
    %2991 = torch.aten.mul.int %416, %int32_3324 : !torch.int, !torch.int -> !torch.int
    %int2_3325 = torch.constant.int 2
    %2992 = torch.aten.mul.int %2991, %int2_3325 : !torch.int, !torch.int -> !torch.int
    %int32_3326 = torch.constant.int 32
    %int8_3327 = torch.constant.int 8
    %int128_3328 = torch.constant.int 128
    %2993 = torch.prim.ListConstruct %2992, %int32_3326, %int8_3327, %int128_3328 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2994 = torch.aten.view %2990, %2993 : !torch.vtensor<[?,32,2,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %2994, [%293], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %2995 = torch.prim.ListConstruct %2988 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_3329 = torch.constant.bool false
    %2996 = torch.aten.index_put %2994, %2995, %2985, %false_3329 : !torch.vtensor<[?,32,8,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,32,8,128],f16>, !torch.bool -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %2996, [%293], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int32_3330 = torch.constant.int 32
    %int2_3331 = torch.constant.int 2
    %int32_3332 = torch.constant.int 32
    %int8_3333 = torch.constant.int 8
    %int128_3334 = torch.constant.int 128
    %2997 = torch.prim.ListConstruct %416, %int32_3330, %int2_3331, %int32_3332, %int8_3333, %int128_3334 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2998 = torch.aten.view %2996, %2997 : !torch.vtensor<[?,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f16>
    torch.bind_symbolic_shape %2998, [%293], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f16>
    %int2097152_3335 = torch.constant.int 2097152
    %2999 = torch.prim.ListConstruct %416, %int2097152_3335 : (!torch.int, !torch.int) -> !torch.list<int>
    %3000 = torch.aten.view %2998, %2999 : !torch.vtensor<[?,32,2,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,2097152],f16>
    torch.bind_symbolic_shape %3000, [%293], affine_map<()[s0] -> (s0, 2097152)> : !torch.vtensor<[?,2097152],f16>
    %int32_3336 = torch.constant.int 32
    %int2_3337 = torch.constant.int 2
    %int32_3338 = torch.constant.int 32
    %int8_3339 = torch.constant.int 8
    %int128_3340 = torch.constant.int 128
    %3001 = torch.prim.ListConstruct %416, %int32_3336, %int2_3337, %int32_3338, %int8_3339, %int128_3340 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3002 = torch.aten.view %3000, %3001 : !torch.vtensor<[?,2097152],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f16>
    torch.bind_symbolic_shape %3002, [%293], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f16>
    %int32_3341 = torch.constant.int 32
    %int8_3342 = torch.constant.int 8
    %int128_3343 = torch.constant.int 128
    %3003 = torch.prim.ListConstruct %2992, %int32_3341, %int8_3342, %int128_3343 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3004 = torch.aten.view %3002, %3003 : !torch.vtensor<[?,32,2,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %3004, [%293], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int4_3344 = torch.constant.int 4
    %int32_3345 = torch.constant.int 32
    %int8_3346 = torch.constant.int 8
    %int128_3347 = torch.constant.int 128
    %3005 = torch.prim.ListConstruct %int4_3344, %425, %int32_3345, %int8_3346, %int128_3347 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3006 = torch.aten.view %2920, %3005 : !torch.vtensor<[4,?,8,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,8,128],f16>
    torch.bind_symbolic_shape %3006, [%292], affine_map<()[s0] -> (4, s0, 32, 8, 128)> : !torch.vtensor<[4,?,32,8,128],f16>
    %int4_3348 = torch.constant.int 4
    %3007 = torch.aten.mul.int %int4_3348, %425 : !torch.int, !torch.int -> !torch.int
    %int32_3349 = torch.constant.int 32
    %int8_3350 = torch.constant.int 8
    %int128_3351 = torch.constant.int 128
    %3008 = torch.prim.ListConstruct %3007, %int32_3349, %int8_3350, %int128_3351 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3009 = torch.aten.view %3006, %3008 : !torch.vtensor<[4,?,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %3009, [%292], affine_map<()[s0] -> (s0 * 4, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int1_3352 = torch.constant.int 1
    %int1_3353 = torch.constant.int 1
    %3010 = torch.aten.add.Scalar %2980, %int1_3352, %int1_3353 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %3010, [%292], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int4_3354 = torch.constant.int 4
    %3011 = torch.aten.mul.int %int4_3354, %425 : !torch.int, !torch.int -> !torch.int
    %3012 = torch.prim.ListConstruct %3011 : (!torch.int) -> !torch.list<int>
    %3013 = torch.aten.view %3010, %3012 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %3013, [%292], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %3014 = torch.prim.ListConstruct %3013 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_3355 = torch.constant.bool false
    %3015 = torch.aten.index_put %3004, %3014, %3009, %false_3355 : !torch.vtensor<[?,32,8,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,32,8,128],f16>, !torch.bool -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %3015, [%293], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int32_3356 = torch.constant.int 32
    %int2_3357 = torch.constant.int 2
    %int32_3358 = torch.constant.int 32
    %int8_3359 = torch.constant.int 8
    %int128_3360 = torch.constant.int 128
    %3016 = torch.prim.ListConstruct %416, %int32_3356, %int2_3357, %int32_3358, %int8_3359, %int128_3360 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3017 = torch.aten.view %3015, %3016 : !torch.vtensor<[?,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f16>
    torch.bind_symbolic_shape %3017, [%293], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f16>
    %int2097152_3361 = torch.constant.int 2097152
    %3018 = torch.prim.ListConstruct %416, %int2097152_3361 : (!torch.int, !torch.int) -> !torch.list<int>
    %3019 = torch.aten.view %3017, %3018 : !torch.vtensor<[?,32,2,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,2097152],f16>
    torch.bind_symbolic_shape %3019, [%293], affine_map<()[s0] -> (s0, 2097152)> : !torch.vtensor<[?,2097152],f16>
    %int-2_3362 = torch.constant.int -2
    %3020 = torch.aten.unsqueeze %2978, %int-2_3362 : !torch.vtensor<[4,?,8,128],f16>, !torch.int -> !torch.vtensor<[4,?,8,1,128],f16>
    torch.bind_symbolic_shape %3020, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 1, 128)> : !torch.vtensor<[4,?,8,1,128],f16>
    %int4_3363 = torch.constant.int 4
    %int8_3364 = torch.constant.int 8
    %int4_3365 = torch.constant.int 4
    %int128_3366 = torch.constant.int 128
    %3021 = torch.prim.ListConstruct %int4_3363, %2964, %int8_3364, %int4_3365, %int128_3366 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_3367 = torch.constant.bool false
    %3022 = torch.aten.expand %3020, %3021, %false_3367 : !torch.vtensor<[4,?,8,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,8,4,128],f16>
    torch.bind_symbolic_shape %3022, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 4, 128)> : !torch.vtensor<[4,?,8,4,128],f16>
    %int0_3368 = torch.constant.int 0
    %3023 = torch.aten.clone %3022, %int0_3368 : !torch.vtensor<[4,?,8,4,128],f16>, !torch.int -> !torch.vtensor<[4,?,8,4,128],f16>
    torch.bind_symbolic_shape %3023, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 4, 128)> : !torch.vtensor<[4,?,8,4,128],f16>
    %int4_3369 = torch.constant.int 4
    %int32_3370 = torch.constant.int 32
    %int128_3371 = torch.constant.int 128
    %3024 = torch.prim.ListConstruct %int4_3369, %2964, %int32_3370, %int128_3371 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3025 = torch.aten._unsafe_view %3023, %3024 : !torch.vtensor<[4,?,8,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %3025, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int-2_3372 = torch.constant.int -2
    %3026 = torch.aten.unsqueeze %2920, %int-2_3372 : !torch.vtensor<[4,?,8,128],f16>, !torch.int -> !torch.vtensor<[4,?,8,1,128],f16>
    torch.bind_symbolic_shape %3026, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 1, 128)> : !torch.vtensor<[4,?,8,1,128],f16>
    %int1_3373 = torch.constant.int 1
    %3027 = torch.aten.size.int %2914, %int1_3373 : !torch.vtensor<[4,?,1024],f16>, !torch.int -> !torch.int
    %int4_3374 = torch.constant.int 4
    %int8_3375 = torch.constant.int 8
    %int4_3376 = torch.constant.int 4
    %int128_3377 = torch.constant.int 128
    %3028 = torch.prim.ListConstruct %int4_3374, %3027, %int8_3375, %int4_3376, %int128_3377 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_3378 = torch.constant.bool false
    %3029 = torch.aten.expand %3026, %3028, %false_3378 : !torch.vtensor<[4,?,8,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,8,4,128],f16>
    torch.bind_symbolic_shape %3029, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 4, 128)> : !torch.vtensor<[4,?,8,4,128],f16>
    %int0_3379 = torch.constant.int 0
    %3030 = torch.aten.clone %3029, %int0_3379 : !torch.vtensor<[4,?,8,4,128],f16>, !torch.int -> !torch.vtensor<[4,?,8,4,128],f16>
    torch.bind_symbolic_shape %3030, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 4, 128)> : !torch.vtensor<[4,?,8,4,128],f16>
    %int4_3380 = torch.constant.int 4
    %int32_3381 = torch.constant.int 32
    %int128_3382 = torch.constant.int 128
    %3031 = torch.prim.ListConstruct %int4_3380, %3027, %int32_3381, %int128_3382 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3032 = torch.aten._unsafe_view %3030, %3031 : !torch.vtensor<[4,?,8,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %3032, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int1_3383 = torch.constant.int 1
    %int2_3384 = torch.constant.int 2
    %3033 = torch.aten.transpose.int %2949, %int1_3383, %int2_3384 : !torch.vtensor<[4,?,32,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,32,?,128],f16>
    torch.bind_symbolic_shape %3033, [%292], affine_map<()[s0] -> (4, 32, s0 * 32, 128)> : !torch.vtensor<[4,32,?,128],f16>
    %int1_3385 = torch.constant.int 1
    %int2_3386 = torch.constant.int 2
    %3034 = torch.aten.transpose.int %3025, %int1_3385, %int2_3386 : !torch.vtensor<[4,?,32,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,32,?,128],f16>
    torch.bind_symbolic_shape %3034, [%292], affine_map<()[s0] -> (4, 32, s0 * 32, 128)> : !torch.vtensor<[4,32,?,128],f16>
    %int1_3387 = torch.constant.int 1
    %int2_3388 = torch.constant.int 2
    %3035 = torch.aten.transpose.int %3032, %int1_3387, %int2_3388 : !torch.vtensor<[4,?,32,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,32,?,128],f16>
    torch.bind_symbolic_shape %3035, [%292], affine_map<()[s0] -> (4, 32, s0 * 32, 128)> : !torch.vtensor<[4,32,?,128],f16>
    %float0.000000e00_3389 = torch.constant.float 0.000000e+00
    %false_3390 = torch.constant.bool false
    %none_3391 = torch.constant.none
    %3036:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%3033, %3034, %3035, %float0.000000e00_3389, %false_3390, %320, %none_3391) : (!torch.vtensor<[4,32,?,128],f16>, !torch.vtensor<[4,32,?,128],f16>, !torch.vtensor<[4,32,?,128],f16>, !torch.float, !torch.bool, !torch.vtensor<[4,1,?,?],f16>, !torch.none) -> (!torch.vtensor<[4,32,?,128],f16>, !torch.vtensor<[4,32,?],f32>) 
    torch.bind_symbolic_shape %3036#0, [%292], affine_map<()[s0] -> (4, 32, s0 * 32, 128)> : !torch.vtensor<[4,32,?,128],f16>
    %int1_3392 = torch.constant.int 1
    %int2_3393 = torch.constant.int 2
    %3037 = torch.aten.transpose.int %3036#0, %int1_3392, %int2_3393 : !torch.vtensor<[4,32,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %3037, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int4_3394 = torch.constant.int 4
    %int4096_3395 = torch.constant.int 4096
    %3038 = torch.prim.ListConstruct %int4_3394, %2935, %int4096_3395 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3039 = torch.aten.view %3037, %3038 : !torch.vtensor<[4,?,32,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3039, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int-2_3396 = torch.constant.int -2
    %int-1_3397 = torch.constant.int -1
    %3040 = torch.aten.transpose.int %122, %int-2_3396, %int-1_3397 : !torch.vtensor<[4096,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,4096],f16>
    %int4_3398 = torch.constant.int 4
    %3041 = torch.aten.mul.int %int4_3398, %2935 : !torch.int, !torch.int -> !torch.int
    %int4096_3399 = torch.constant.int 4096
    %3042 = torch.prim.ListConstruct %3041, %int4096_3399 : (!torch.int, !torch.int) -> !torch.list<int>
    %3043 = torch.aten.view %3039, %3042 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %3043, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %3044 = torch.aten.mm %3043, %3040 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %3044, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_3400 = torch.constant.int 4
    %int4096_3401 = torch.constant.int 4096
    %3045 = torch.prim.ListConstruct %int4_3400, %2935, %int4096_3401 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3046 = torch.aten.view %3044, %3045 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3046, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_3402 = torch.constant.int 1
    %3047 = torch.aten.add.Tensor %2884, %3046, %int1_3402 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3047, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int6_3403 = torch.constant.int 6
    %3048 = torch.prims.convert_element_type %3047, %int6_3403 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %3048, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_3404 = torch.constant.int 2
    %3049 = torch.aten.pow.Tensor_Scalar %3048, %int2_3404 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %3049, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int-1_3405 = torch.constant.int -1
    %3050 = torch.prim.ListConstruct %int-1_3405 : (!torch.int) -> !torch.list<int>
    %true_3406 = torch.constant.bool true
    %none_3407 = torch.constant.none
    %3051 = torch.aten.mean.dim %3049, %3050, %true_3406, %none_3407 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %3051, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_3408 = torch.constant.float 9.9999997473787516E-6
    %int1_3409 = torch.constant.int 1
    %3052 = torch.aten.add.Scalar %3051, %float9.999990e-06_3408, %int1_3409 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %3052, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %3053 = torch.aten.rsqrt %3052 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %3053, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %3054 = torch.aten.mul.Tensor %3048, %3053 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %3054, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %3055 = torch.aten.mul.Tensor %123, %3054 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %3055, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int5_3410 = torch.constant.int 5
    %3056 = torch.prims.convert_element_type %3055, %int5_3410 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3056, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int-2_3411 = torch.constant.int -2
    %int-1_3412 = torch.constant.int -1
    %3057 = torch.aten.transpose.int %124, %int-2_3411, %int-1_3412 : !torch.vtensor<[14336,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,14336],f16>
    %int4_3413 = torch.constant.int 4
    %3058 = torch.aten.mul.int %int4_3413, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_3414 = torch.constant.int 4096
    %3059 = torch.prim.ListConstruct %3058, %int4096_3414 : (!torch.int, !torch.int) -> !torch.list<int>
    %3060 = torch.aten.view %3056, %3059 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %3060, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %3061 = torch.aten.mm %3060, %3057 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,14336],f16> -> !torch.vtensor<[?,14336],f16>
    torch.bind_symbolic_shape %3061, [%292], affine_map<()[s0] -> (s0 * 128, 14336)> : !torch.vtensor<[?,14336],f16>
    %int4_3415 = torch.constant.int 4
    %int14336_3416 = torch.constant.int 14336
    %3062 = torch.prim.ListConstruct %int4_3415, %294, %int14336_3416 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3063 = torch.aten.view %3061, %3062 : !torch.vtensor<[?,14336],f16>, !torch.list<int> -> !torch.vtensor<[4,?,14336],f16>
    torch.bind_symbolic_shape %3063, [%292], affine_map<()[s0] -> (4, s0 * 32, 14336)> : !torch.vtensor<[4,?,14336],f16>
    %3064 = torch.aten.silu %3063 : !torch.vtensor<[4,?,14336],f16> -> !torch.vtensor<[4,?,14336],f16>
    torch.bind_symbolic_shape %3064, [%292], affine_map<()[s0] -> (4, s0 * 32, 14336)> : !torch.vtensor<[4,?,14336],f16>
    %int-2_3417 = torch.constant.int -2
    %int-1_3418 = torch.constant.int -1
    %3065 = torch.aten.transpose.int %125, %int-2_3417, %int-1_3418 : !torch.vtensor<[14336,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,14336],f16>
    %int4_3419 = torch.constant.int 4
    %3066 = torch.aten.mul.int %int4_3419, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_3420 = torch.constant.int 4096
    %3067 = torch.prim.ListConstruct %3066, %int4096_3420 : (!torch.int, !torch.int) -> !torch.list<int>
    %3068 = torch.aten.view %3056, %3067 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %3068, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %3069 = torch.aten.mm %3068, %3065 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,14336],f16> -> !torch.vtensor<[?,14336],f16>
    torch.bind_symbolic_shape %3069, [%292], affine_map<()[s0] -> (s0 * 128, 14336)> : !torch.vtensor<[?,14336],f16>
    %int4_3421 = torch.constant.int 4
    %int14336_3422 = torch.constant.int 14336
    %3070 = torch.prim.ListConstruct %int4_3421, %294, %int14336_3422 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3071 = torch.aten.view %3069, %3070 : !torch.vtensor<[?,14336],f16>, !torch.list<int> -> !torch.vtensor<[4,?,14336],f16>
    torch.bind_symbolic_shape %3071, [%292], affine_map<()[s0] -> (4, s0 * 32, 14336)> : !torch.vtensor<[4,?,14336],f16>
    %3072 = torch.aten.mul.Tensor %3064, %3071 : !torch.vtensor<[4,?,14336],f16>, !torch.vtensor<[4,?,14336],f16> -> !torch.vtensor<[4,?,14336],f16>
    torch.bind_symbolic_shape %3072, [%292], affine_map<()[s0] -> (4, s0 * 32, 14336)> : !torch.vtensor<[4,?,14336],f16>
    %int-2_3423 = torch.constant.int -2
    %int-1_3424 = torch.constant.int -1
    %3073 = torch.aten.transpose.int %126, %int-2_3423, %int-1_3424 : !torch.vtensor<[4096,14336],f16>, !torch.int, !torch.int -> !torch.vtensor<[14336,4096],f16>
    %int1_3425 = torch.constant.int 1
    %3074 = torch.aten.size.int %3063, %int1_3425 : !torch.vtensor<[4,?,14336],f16>, !torch.int -> !torch.int
    %int4_3426 = torch.constant.int 4
    %3075 = torch.aten.mul.int %int4_3426, %3074 : !torch.int, !torch.int -> !torch.int
    %int14336_3427 = torch.constant.int 14336
    %3076 = torch.prim.ListConstruct %3075, %int14336_3427 : (!torch.int, !torch.int) -> !torch.list<int>
    %3077 = torch.aten.view %3072, %3076 : !torch.vtensor<[4,?,14336],f16>, !torch.list<int> -> !torch.vtensor<[?,14336],f16>
    torch.bind_symbolic_shape %3077, [%292], affine_map<()[s0] -> (s0 * 128, 14336)> : !torch.vtensor<[?,14336],f16>
    %3078 = torch.aten.mm %3077, %3073 : !torch.vtensor<[?,14336],f16>, !torch.vtensor<[14336,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %3078, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_3428 = torch.constant.int 4
    %int4096_3429 = torch.constant.int 4096
    %3079 = torch.prim.ListConstruct %int4_3428, %3074, %int4096_3429 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3080 = torch.aten.view %3078, %3079 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3080, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_3430 = torch.constant.int 1
    %3081 = torch.aten.add.Tensor %3047, %3080, %int1_3430 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3081, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int6_3431 = torch.constant.int 6
    %3082 = torch.prims.convert_element_type %3081, %int6_3431 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %3082, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_3432 = torch.constant.int 2
    %3083 = torch.aten.pow.Tensor_Scalar %3082, %int2_3432 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %3083, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int-1_3433 = torch.constant.int -1
    %3084 = torch.prim.ListConstruct %int-1_3433 : (!torch.int) -> !torch.list<int>
    %true_3434 = torch.constant.bool true
    %none_3435 = torch.constant.none
    %3085 = torch.aten.mean.dim %3083, %3084, %true_3434, %none_3435 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %3085, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_3436 = torch.constant.float 9.9999997473787516E-6
    %int1_3437 = torch.constant.int 1
    %3086 = torch.aten.add.Scalar %3085, %float9.999990e-06_3436, %int1_3437 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %3086, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %3087 = torch.aten.rsqrt %3086 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %3087, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %3088 = torch.aten.mul.Tensor %3082, %3087 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %3088, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %3089 = torch.aten.mul.Tensor %127, %3088 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %3089, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int5_3438 = torch.constant.int 5
    %3090 = torch.prims.convert_element_type %3089, %int5_3438 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3090, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int-2_3439 = torch.constant.int -2
    %int-1_3440 = torch.constant.int -1
    %3091 = torch.aten.transpose.int %128, %int-2_3439, %int-1_3440 : !torch.vtensor<[4096,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,4096],f16>
    %int4_3441 = torch.constant.int 4
    %3092 = torch.aten.mul.int %int4_3441, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_3442 = torch.constant.int 4096
    %3093 = torch.prim.ListConstruct %3092, %int4096_3442 : (!torch.int, !torch.int) -> !torch.list<int>
    %3094 = torch.aten.view %3090, %3093 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %3094, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %3095 = torch.aten.mm %3094, %3091 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %3095, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_3443 = torch.constant.int 4
    %int4096_3444 = torch.constant.int 4096
    %3096 = torch.prim.ListConstruct %int4_3443, %294, %int4096_3444 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3097 = torch.aten.view %3095, %3096 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3097, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int-2_3445 = torch.constant.int -2
    %int-1_3446 = torch.constant.int -1
    %3098 = torch.aten.transpose.int %129, %int-2_3445, %int-1_3446 : !torch.vtensor<[1024,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,1024],f16>
    %int4_3447 = torch.constant.int 4
    %3099 = torch.aten.mul.int %int4_3447, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_3448 = torch.constant.int 4096
    %3100 = torch.prim.ListConstruct %3099, %int4096_3448 : (!torch.int, !torch.int) -> !torch.list<int>
    %3101 = torch.aten.view %3090, %3100 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %3101, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %3102 = torch.aten.mm %3101, %3098 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1024],f16> -> !torch.vtensor<[?,1024],f16>
    torch.bind_symbolic_shape %3102, [%292], affine_map<()[s0] -> (s0 * 128, 1024)> : !torch.vtensor<[?,1024],f16>
    %int4_3449 = torch.constant.int 4
    %int1024_3450 = torch.constant.int 1024
    %3103 = torch.prim.ListConstruct %int4_3449, %294, %int1024_3450 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3104 = torch.aten.view %3102, %3103 : !torch.vtensor<[?,1024],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1024],f16>
    torch.bind_symbolic_shape %3104, [%292], affine_map<()[s0] -> (4, s0 * 32, 1024)> : !torch.vtensor<[4,?,1024],f16>
    %int-2_3451 = torch.constant.int -2
    %int-1_3452 = torch.constant.int -1
    %3105 = torch.aten.transpose.int %130, %int-2_3451, %int-1_3452 : !torch.vtensor<[1024,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,1024],f16>
    %int4_3453 = torch.constant.int 4
    %3106 = torch.aten.mul.int %int4_3453, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_3454 = torch.constant.int 4096
    %3107 = torch.prim.ListConstruct %3106, %int4096_3454 : (!torch.int, !torch.int) -> !torch.list<int>
    %3108 = torch.aten.view %3090, %3107 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %3108, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %3109 = torch.aten.mm %3108, %3105 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1024],f16> -> !torch.vtensor<[?,1024],f16>
    torch.bind_symbolic_shape %3109, [%292], affine_map<()[s0] -> (s0 * 128, 1024)> : !torch.vtensor<[?,1024],f16>
    %int4_3455 = torch.constant.int 4
    %int1024_3456 = torch.constant.int 1024
    %3110 = torch.prim.ListConstruct %int4_3455, %294, %int1024_3456 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3111 = torch.aten.view %3109, %3110 : !torch.vtensor<[?,1024],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1024],f16>
    torch.bind_symbolic_shape %3111, [%292], affine_map<()[s0] -> (4, s0 * 32, 1024)> : !torch.vtensor<[4,?,1024],f16>
    %int4_3457 = torch.constant.int 4
    %int32_3458 = torch.constant.int 32
    %int128_3459 = torch.constant.int 128
    %3112 = torch.prim.ListConstruct %int4_3457, %294, %int32_3458, %int128_3459 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3113 = torch.aten.view %3097, %3112 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %3113, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int4_3460 = torch.constant.int 4
    %int8_3461 = torch.constant.int 8
    %int128_3462 = torch.constant.int 128
    %3114 = torch.prim.ListConstruct %int4_3460, %294, %int8_3461, %int128_3462 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3115 = torch.aten.view %3104, %3114 : !torch.vtensor<[4,?,1024],f16>, !torch.list<int> -> !torch.vtensor<[4,?,8,128],f16>
    torch.bind_symbolic_shape %3115, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 128)> : !torch.vtensor<[4,?,8,128],f16>
    %int4_3463 = torch.constant.int 4
    %int8_3464 = torch.constant.int 8
    %int128_3465 = torch.constant.int 128
    %3116 = torch.prim.ListConstruct %int4_3463, %294, %int8_3464, %int128_3465 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3117 = torch.aten.view %3111, %3116 : !torch.vtensor<[4,?,1024],f16>, !torch.list<int> -> !torch.vtensor<[4,?,8,128],f16>
    torch.bind_symbolic_shape %3117, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 128)> : !torch.vtensor<[4,?,8,128],f16>
    %int131072_3466 = torch.constant.int 131072
    %none_3467 = torch.constant.none
    %none_3468 = torch.constant.none
    %cpu_3469 = torch.constant.device "cpu"
    %false_3470 = torch.constant.bool false
    %3118 = torch.aten.arange %int131072_3466, %none_3467, %none_3468, %cpu_3469, %false_3470 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64>
    %int0_3471 = torch.constant.int 0
    %int128_3472 = torch.constant.int 128
    %int2_3473 = torch.constant.int 2
    %none_3474 = torch.constant.none
    %none_3475 = torch.constant.none
    %cpu_3476 = torch.constant.device "cpu"
    %false_3477 = torch.constant.bool false
    %3119 = torch.aten.arange.start_step %int0_3471, %int128_3472, %int2_3473, %none_3474, %none_3475, %cpu_3476, %false_3477 : !torch.int, !torch.int, !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64>
    %int0_3478 = torch.constant.int 0
    %int0_3479 = torch.constant.int 0
    %int64_3480 = torch.constant.int 64
    %int1_3481 = torch.constant.int 1
    %3120 = torch.aten.slice.Tensor %3119, %int0_3478, %int0_3479, %int64_3480, %int1_3481 : !torch.vtensor<[64],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[64],si64>
    %int6_3482 = torch.constant.int 6
    %3121 = torch.prims.convert_element_type %3120, %int6_3482 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32>
    %int128_3483 = torch.constant.int 128
    %3122 = torch.aten.div.Scalar %3121, %int128_3483 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32>
    %float5.000000e05_3484 = torch.constant.float 5.000000e+05
    %3123 = torch.aten.pow.Scalar %float5.000000e05_3484, %3122 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %3124 = torch.aten.reciprocal %3123 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %float1.000000e00_3485 = torch.constant.float 1.000000e+00
    %3125 = torch.aten.mul.Scalar %3124, %float1.000000e00_3485 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32>
    %int131072_3486 = torch.constant.int 131072
    %int1_3487 = torch.constant.int 1
    %3126 = torch.prim.ListConstruct %int131072_3486, %int1_3487 : (!torch.int, !torch.int) -> !torch.list<int>
    %3127 = torch.aten.view %3118, %3126 : !torch.vtensor<[131072],si64>, !torch.list<int> -> !torch.vtensor<[131072,1],si64>
    %3128 = torch.aten.mul.Tensor %3127, %3125 : !torch.vtensor<[131072,1],si64>, !torch.vtensor<[64],f32> -> !torch.vtensor<[131072,64],f32>
    %3129 = torch.aten.cos %3128 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %3130 = torch.aten.sin %3128 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %3131 = torch.aten.complex %3129, %3130 : !torch.vtensor<[131072,64],f32>, !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],complex<f32>>
    %int1_3488 = torch.constant.int 1
    %3132 = torch.aten.size.int %3097, %int1_3488 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.int
    %int0_3489 = torch.constant.int 0
    %3133 = torch.aten.add.int %int0_3489, %3132 : !torch.int, !torch.int -> !torch.int
    %int0_3490 = torch.constant.int 0
    %int0_3491 = torch.constant.int 0
    %int1_3492 = torch.constant.int 1
    %3134 = torch.aten.slice.Tensor %3131, %int0_3490, %int0_3491, %3133, %int1_3492 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %3134, [%292], affine_map<()[s0] -> (s0 * 32, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_3493 = torch.constant.int 1
    %int0_3494 = torch.constant.int 0
    %int9223372036854775807_3495 = torch.constant.int 9223372036854775807
    %int1_3496 = torch.constant.int 1
    %3135 = torch.aten.slice.Tensor %3134, %int1_3493, %int0_3494, %int9223372036854775807_3495, %int1_3496 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %3135, [%292], affine_map<()[s0] -> (s0 * 32, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_3497 = torch.constant.int 0
    %3136 = torch.aten.unsqueeze %3135, %int0_3497 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %3136, [%292], affine_map<()[s0] -> (1, s0 * 32, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_3498 = torch.constant.int 2
    %3137 = torch.aten.unsqueeze %3136, %int2_3498 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %3137, [%292], affine_map<()[s0] -> (1, s0 * 32, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_3499 = torch.constant.int 3
    %int0_3500 = torch.constant.int 0
    %int9223372036854775807_3501 = torch.constant.int 9223372036854775807
    %int1_3502 = torch.constant.int 1
    %3138 = torch.aten.slice.Tensor %3137, %int3_3499, %int0_3500, %int9223372036854775807_3501, %int1_3502 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %3138, [%292], affine_map<()[s0] -> (1, s0 * 32, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %3139 = torch_c.to_builtin_tensor %3113 : !torch.vtensor<[4,?,32,128],f16> -> tensor<4x?x32x128xf16>
    %c1_3503 = arith.constant 1 : index
    %dim_3504 = tensor.dim %3139, %c1_3503 : tensor<4x?x32x128xf16>
    %3140 = flow.tensor.bitcast %3139 : tensor<4x?x32x128xf16>{%dim_3504} -> tensor<4x?x32x64xcomplex<f16>>{%dim_3504}
    %3141 = torch_c.from_builtin_tensor %3140 : tensor<4x?x32x64xcomplex<f16>> -> !torch.vtensor<[4,?,32,64],complex<f16>>
    torch.bind_symbolic_shape %3141, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 64)> : !torch.vtensor<[4,?,32,64],complex<f16>>
    %3142 = torch.aten.mul.Tensor %3141, %3138 : !torch.vtensor<[4,?,32,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,32,64],complex<f32>>
    torch.bind_symbolic_shape %3142, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 64)> : !torch.vtensor<[4,?,32,64],complex<f32>>
    %3143 = torch_c.to_builtin_tensor %3142 : !torch.vtensor<[4,?,32,64],complex<f32>> -> tensor<4x?x32x64xcomplex<f32>>
    %c1_3505 = arith.constant 1 : index
    %dim_3506 = tensor.dim %3143, %c1_3505 : tensor<4x?x32x64xcomplex<f32>>
    %3144 = flow.tensor.bitcast %3143 : tensor<4x?x32x64xcomplex<f32>>{%dim_3506} -> tensor<4x?x32x128xf32>{%dim_3506}
    %3145 = torch_c.from_builtin_tensor %3144 : tensor<4x?x32x128xf32> -> !torch.vtensor<[4,?,32,128],f32>
    torch.bind_symbolic_shape %3145, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f32>
    %int5_3507 = torch.constant.int 5
    %3146 = torch.prims.convert_element_type %3145, %int5_3507 : !torch.vtensor<[4,?,32,128],f32>, !torch.int -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %3146, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int131072_3508 = torch.constant.int 131072
    %none_3509 = torch.constant.none
    %none_3510 = torch.constant.none
    %cpu_3511 = torch.constant.device "cpu"
    %false_3512 = torch.constant.bool false
    %3147 = torch.aten.arange %int131072_3508, %none_3509, %none_3510, %cpu_3511, %false_3512 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64>
    %int0_3513 = torch.constant.int 0
    %int128_3514 = torch.constant.int 128
    %int2_3515 = torch.constant.int 2
    %none_3516 = torch.constant.none
    %none_3517 = torch.constant.none
    %cpu_3518 = torch.constant.device "cpu"
    %false_3519 = torch.constant.bool false
    %3148 = torch.aten.arange.start_step %int0_3513, %int128_3514, %int2_3515, %none_3516, %none_3517, %cpu_3518, %false_3519 : !torch.int, !torch.int, !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64>
    %int0_3520 = torch.constant.int 0
    %int0_3521 = torch.constant.int 0
    %int64_3522 = torch.constant.int 64
    %int1_3523 = torch.constant.int 1
    %3149 = torch.aten.slice.Tensor %3148, %int0_3520, %int0_3521, %int64_3522, %int1_3523 : !torch.vtensor<[64],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[64],si64>
    %int6_3524 = torch.constant.int 6
    %3150 = torch.prims.convert_element_type %3149, %int6_3524 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32>
    %int128_3525 = torch.constant.int 128
    %3151 = torch.aten.div.Scalar %3150, %int128_3525 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32>
    %float5.000000e05_3526 = torch.constant.float 5.000000e+05
    %3152 = torch.aten.pow.Scalar %float5.000000e05_3526, %3151 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %3153 = torch.aten.reciprocal %3152 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %float1.000000e00_3527 = torch.constant.float 1.000000e+00
    %3154 = torch.aten.mul.Scalar %3153, %float1.000000e00_3527 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32>
    %int131072_3528 = torch.constant.int 131072
    %int1_3529 = torch.constant.int 1
    %3155 = torch.prim.ListConstruct %int131072_3528, %int1_3529 : (!torch.int, !torch.int) -> !torch.list<int>
    %3156 = torch.aten.view %3147, %3155 : !torch.vtensor<[131072],si64>, !torch.list<int> -> !torch.vtensor<[131072,1],si64>
    %3157 = torch.aten.mul.Tensor %3156, %3154 : !torch.vtensor<[131072,1],si64>, !torch.vtensor<[64],f32> -> !torch.vtensor<[131072,64],f32>
    %3158 = torch.aten.cos %3157 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %3159 = torch.aten.sin %3157 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %3160 = torch.aten.complex %3158, %3159 : !torch.vtensor<[131072,64],f32>, !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],complex<f32>>
    %int1_3530 = torch.constant.int 1
    %3161 = torch.aten.size.int %3104, %int1_3530 : !torch.vtensor<[4,?,1024],f16>, !torch.int -> !torch.int
    %int0_3531 = torch.constant.int 0
    %3162 = torch.aten.add.int %int0_3531, %3161 : !torch.int, !torch.int -> !torch.int
    %int0_3532 = torch.constant.int 0
    %int0_3533 = torch.constant.int 0
    %int1_3534 = torch.constant.int 1
    %3163 = torch.aten.slice.Tensor %3160, %int0_3532, %int0_3533, %3162, %int1_3534 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %3163, [%292], affine_map<()[s0] -> (s0 * 32, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_3535 = torch.constant.int 1
    %int0_3536 = torch.constant.int 0
    %int9223372036854775807_3537 = torch.constant.int 9223372036854775807
    %int1_3538 = torch.constant.int 1
    %3164 = torch.aten.slice.Tensor %3163, %int1_3535, %int0_3536, %int9223372036854775807_3537, %int1_3538 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %3164, [%292], affine_map<()[s0] -> (s0 * 32, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_3539 = torch.constant.int 0
    %3165 = torch.aten.unsqueeze %3164, %int0_3539 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %3165, [%292], affine_map<()[s0] -> (1, s0 * 32, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_3540 = torch.constant.int 2
    %3166 = torch.aten.unsqueeze %3165, %int2_3540 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %3166, [%292], affine_map<()[s0] -> (1, s0 * 32, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_3541 = torch.constant.int 3
    %int0_3542 = torch.constant.int 0
    %int9223372036854775807_3543 = torch.constant.int 9223372036854775807
    %int1_3544 = torch.constant.int 1
    %3167 = torch.aten.slice.Tensor %3166, %int3_3541, %int0_3542, %int9223372036854775807_3543, %int1_3544 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %3167, [%292], affine_map<()[s0] -> (1, s0 * 32, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %3168 = torch_c.to_builtin_tensor %3115 : !torch.vtensor<[4,?,8,128],f16> -> tensor<4x?x8x128xf16>
    %c1_3545 = arith.constant 1 : index
    %dim_3546 = tensor.dim %3168, %c1_3545 : tensor<4x?x8x128xf16>
    %3169 = flow.tensor.bitcast %3168 : tensor<4x?x8x128xf16>{%dim_3546} -> tensor<4x?x8x64xcomplex<f16>>{%dim_3546}
    %3170 = torch_c.from_builtin_tensor %3169 : tensor<4x?x8x64xcomplex<f16>> -> !torch.vtensor<[4,?,8,64],complex<f16>>
    torch.bind_symbolic_shape %3170, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 64)> : !torch.vtensor<[4,?,8,64],complex<f16>>
    %3171 = torch.aten.mul.Tensor %3170, %3167 : !torch.vtensor<[4,?,8,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,8,64],complex<f32>>
    torch.bind_symbolic_shape %3171, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 64)> : !torch.vtensor<[4,?,8,64],complex<f32>>
    %3172 = torch_c.to_builtin_tensor %3171 : !torch.vtensor<[4,?,8,64],complex<f32>> -> tensor<4x?x8x64xcomplex<f32>>
    %c1_3547 = arith.constant 1 : index
    %dim_3548 = tensor.dim %3172, %c1_3547 : tensor<4x?x8x64xcomplex<f32>>
    %3173 = flow.tensor.bitcast %3172 : tensor<4x?x8x64xcomplex<f32>>{%dim_3548} -> tensor<4x?x8x128xf32>{%dim_3548}
    %3174 = torch_c.from_builtin_tensor %3173 : tensor<4x?x8x128xf32> -> !torch.vtensor<[4,?,8,128],f32>
    torch.bind_symbolic_shape %3174, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 128)> : !torch.vtensor<[4,?,8,128],f32>
    %int5_3549 = torch.constant.int 5
    %3175 = torch.prims.convert_element_type %3174, %int5_3549 : !torch.vtensor<[4,?,8,128],f32>, !torch.int -> !torch.vtensor<[4,?,8,128],f16>
    torch.bind_symbolic_shape %3175, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 128)> : !torch.vtensor<[4,?,8,128],f16>
    %int64_3550 = torch.constant.int 64
    %3176 = torch.aten.mul.Scalar %arg2, %int64_3550 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %3176, [%292], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int28 = torch.constant.int 28
    %int1_3551 = torch.constant.int 1
    %3177 = torch.aten.add.Scalar %3176, %int28, %int1_3551 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %3177, [%292], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int4_3552 = torch.constant.int 4
    %int32_3553 = torch.constant.int 32
    %int8_3554 = torch.constant.int 8
    %int128_3555 = torch.constant.int 128
    %3178 = torch.prim.ListConstruct %int4_3552, %425, %int32_3553, %int8_3554, %int128_3555 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3179 = torch.aten.view %3175, %3178 : !torch.vtensor<[4,?,8,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,8,128],f16>
    torch.bind_symbolic_shape %3179, [%292], affine_map<()[s0] -> (4, s0, 32, 8, 128)> : !torch.vtensor<[4,?,32,8,128],f16>
    %int4_3556 = torch.constant.int 4
    %3180 = torch.aten.mul.int %int4_3556, %425 : !torch.int, !torch.int -> !torch.int
    %int32_3557 = torch.constant.int 32
    %int8_3558 = torch.constant.int 8
    %int128_3559 = torch.constant.int 128
    %3181 = torch.prim.ListConstruct %3180, %int32_3557, %int8_3558, %int128_3559 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3182 = torch.aten.view %3179, %3181 : !torch.vtensor<[4,?,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %3182, [%292], affine_map<()[s0] -> (s0 * 4, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int4_3560 = torch.constant.int 4
    %3183 = torch.aten.mul.int %int4_3560, %425 : !torch.int, !torch.int -> !torch.int
    %3184 = torch.prim.ListConstruct %3183 : (!torch.int) -> !torch.list<int>
    %3185 = torch.aten.view %3177, %3184 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %3185, [%292], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int32_3561 = torch.constant.int 32
    %int2_3562 = torch.constant.int 2
    %int32_3563 = torch.constant.int 32
    %int8_3564 = torch.constant.int 8
    %int128_3565 = torch.constant.int 128
    %3186 = torch.prim.ListConstruct %416, %int32_3561, %int2_3562, %int32_3563, %int8_3564, %int128_3565 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3187 = torch.aten.view %3019, %3186 : !torch.vtensor<[?,2097152],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f16>
    torch.bind_symbolic_shape %3187, [%293], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f16>
    %int32_3566 = torch.constant.int 32
    %3188 = torch.aten.mul.int %416, %int32_3566 : !torch.int, !torch.int -> !torch.int
    %int2_3567 = torch.constant.int 2
    %3189 = torch.aten.mul.int %3188, %int2_3567 : !torch.int, !torch.int -> !torch.int
    %int32_3568 = torch.constant.int 32
    %int8_3569 = torch.constant.int 8
    %int128_3570 = torch.constant.int 128
    %3190 = torch.prim.ListConstruct %3189, %int32_3568, %int8_3569, %int128_3570 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3191 = torch.aten.view %3187, %3190 : !torch.vtensor<[?,32,2,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %3191, [%293], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %3192 = torch.prim.ListConstruct %3185 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_3571 = torch.constant.bool false
    %3193 = torch.aten.index_put %3191, %3192, %3182, %false_3571 : !torch.vtensor<[?,32,8,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,32,8,128],f16>, !torch.bool -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %3193, [%293], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int32_3572 = torch.constant.int 32
    %int2_3573 = torch.constant.int 2
    %int32_3574 = torch.constant.int 32
    %int8_3575 = torch.constant.int 8
    %int128_3576 = torch.constant.int 128
    %3194 = torch.prim.ListConstruct %416, %int32_3572, %int2_3573, %int32_3574, %int8_3575, %int128_3576 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3195 = torch.aten.view %3193, %3194 : !torch.vtensor<[?,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f16>
    torch.bind_symbolic_shape %3195, [%293], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f16>
    %int2097152_3577 = torch.constant.int 2097152
    %3196 = torch.prim.ListConstruct %416, %int2097152_3577 : (!torch.int, !torch.int) -> !torch.list<int>
    %3197 = torch.aten.view %3195, %3196 : !torch.vtensor<[?,32,2,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,2097152],f16>
    torch.bind_symbolic_shape %3197, [%293], affine_map<()[s0] -> (s0, 2097152)> : !torch.vtensor<[?,2097152],f16>
    %int32_3578 = torch.constant.int 32
    %int2_3579 = torch.constant.int 2
    %int32_3580 = torch.constant.int 32
    %int8_3581 = torch.constant.int 8
    %int128_3582 = torch.constant.int 128
    %3198 = torch.prim.ListConstruct %416, %int32_3578, %int2_3579, %int32_3580, %int8_3581, %int128_3582 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3199 = torch.aten.view %3197, %3198 : !torch.vtensor<[?,2097152],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f16>
    torch.bind_symbolic_shape %3199, [%293], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f16>
    %int32_3583 = torch.constant.int 32
    %int8_3584 = torch.constant.int 8
    %int128_3585 = torch.constant.int 128
    %3200 = torch.prim.ListConstruct %3189, %int32_3583, %int8_3584, %int128_3585 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3201 = torch.aten.view %3199, %3200 : !torch.vtensor<[?,32,2,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %3201, [%293], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int4_3586 = torch.constant.int 4
    %int32_3587 = torch.constant.int 32
    %int8_3588 = torch.constant.int 8
    %int128_3589 = torch.constant.int 128
    %3202 = torch.prim.ListConstruct %int4_3586, %425, %int32_3587, %int8_3588, %int128_3589 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3203 = torch.aten.view %3117, %3202 : !torch.vtensor<[4,?,8,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,8,128],f16>
    torch.bind_symbolic_shape %3203, [%292], affine_map<()[s0] -> (4, s0, 32, 8, 128)> : !torch.vtensor<[4,?,32,8,128],f16>
    %int4_3590 = torch.constant.int 4
    %3204 = torch.aten.mul.int %int4_3590, %425 : !torch.int, !torch.int -> !torch.int
    %int32_3591 = torch.constant.int 32
    %int8_3592 = torch.constant.int 8
    %int128_3593 = torch.constant.int 128
    %3205 = torch.prim.ListConstruct %3204, %int32_3591, %int8_3592, %int128_3593 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3206 = torch.aten.view %3203, %3205 : !torch.vtensor<[4,?,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %3206, [%292], affine_map<()[s0] -> (s0 * 4, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int1_3594 = torch.constant.int 1
    %int1_3595 = torch.constant.int 1
    %3207 = torch.aten.add.Scalar %3177, %int1_3594, %int1_3595 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %3207, [%292], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int4_3596 = torch.constant.int 4
    %3208 = torch.aten.mul.int %int4_3596, %425 : !torch.int, !torch.int -> !torch.int
    %3209 = torch.prim.ListConstruct %3208 : (!torch.int) -> !torch.list<int>
    %3210 = torch.aten.view %3207, %3209 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %3210, [%292], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %3211 = torch.prim.ListConstruct %3210 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_3597 = torch.constant.bool false
    %3212 = torch.aten.index_put %3201, %3211, %3206, %false_3597 : !torch.vtensor<[?,32,8,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,32,8,128],f16>, !torch.bool -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %3212, [%293], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int32_3598 = torch.constant.int 32
    %int2_3599 = torch.constant.int 2
    %int32_3600 = torch.constant.int 32
    %int8_3601 = torch.constant.int 8
    %int128_3602 = torch.constant.int 128
    %3213 = torch.prim.ListConstruct %416, %int32_3598, %int2_3599, %int32_3600, %int8_3601, %int128_3602 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3214 = torch.aten.view %3212, %3213 : !torch.vtensor<[?,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f16>
    torch.bind_symbolic_shape %3214, [%293], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f16>
    %int2097152_3603 = torch.constant.int 2097152
    %3215 = torch.prim.ListConstruct %416, %int2097152_3603 : (!torch.int, !torch.int) -> !torch.list<int>
    %3216 = torch.aten.view %3214, %3215 : !torch.vtensor<[?,32,2,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,2097152],f16>
    torch.bind_symbolic_shape %3216, [%293], affine_map<()[s0] -> (s0, 2097152)> : !torch.vtensor<[?,2097152],f16>
    %int-2_3604 = torch.constant.int -2
    %3217 = torch.aten.unsqueeze %3175, %int-2_3604 : !torch.vtensor<[4,?,8,128],f16>, !torch.int -> !torch.vtensor<[4,?,8,1,128],f16>
    torch.bind_symbolic_shape %3217, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 1, 128)> : !torch.vtensor<[4,?,8,1,128],f16>
    %int4_3605 = torch.constant.int 4
    %int8_3606 = torch.constant.int 8
    %int4_3607 = torch.constant.int 4
    %int128_3608 = torch.constant.int 128
    %3218 = torch.prim.ListConstruct %int4_3605, %3161, %int8_3606, %int4_3607, %int128_3608 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_3609 = torch.constant.bool false
    %3219 = torch.aten.expand %3217, %3218, %false_3609 : !torch.vtensor<[4,?,8,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,8,4,128],f16>
    torch.bind_symbolic_shape %3219, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 4, 128)> : !torch.vtensor<[4,?,8,4,128],f16>
    %int0_3610 = torch.constant.int 0
    %3220 = torch.aten.clone %3219, %int0_3610 : !torch.vtensor<[4,?,8,4,128],f16>, !torch.int -> !torch.vtensor<[4,?,8,4,128],f16>
    torch.bind_symbolic_shape %3220, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 4, 128)> : !torch.vtensor<[4,?,8,4,128],f16>
    %int4_3611 = torch.constant.int 4
    %int32_3612 = torch.constant.int 32
    %int128_3613 = torch.constant.int 128
    %3221 = torch.prim.ListConstruct %int4_3611, %3161, %int32_3612, %int128_3613 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3222 = torch.aten._unsafe_view %3220, %3221 : !torch.vtensor<[4,?,8,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %3222, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int-2_3614 = torch.constant.int -2
    %3223 = torch.aten.unsqueeze %3117, %int-2_3614 : !torch.vtensor<[4,?,8,128],f16>, !torch.int -> !torch.vtensor<[4,?,8,1,128],f16>
    torch.bind_symbolic_shape %3223, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 1, 128)> : !torch.vtensor<[4,?,8,1,128],f16>
    %int1_3615 = torch.constant.int 1
    %3224 = torch.aten.size.int %3111, %int1_3615 : !torch.vtensor<[4,?,1024],f16>, !torch.int -> !torch.int
    %int4_3616 = torch.constant.int 4
    %int8_3617 = torch.constant.int 8
    %int4_3618 = torch.constant.int 4
    %int128_3619 = torch.constant.int 128
    %3225 = torch.prim.ListConstruct %int4_3616, %3224, %int8_3617, %int4_3618, %int128_3619 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_3620 = torch.constant.bool false
    %3226 = torch.aten.expand %3223, %3225, %false_3620 : !torch.vtensor<[4,?,8,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,8,4,128],f16>
    torch.bind_symbolic_shape %3226, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 4, 128)> : !torch.vtensor<[4,?,8,4,128],f16>
    %int0_3621 = torch.constant.int 0
    %3227 = torch.aten.clone %3226, %int0_3621 : !torch.vtensor<[4,?,8,4,128],f16>, !torch.int -> !torch.vtensor<[4,?,8,4,128],f16>
    torch.bind_symbolic_shape %3227, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 4, 128)> : !torch.vtensor<[4,?,8,4,128],f16>
    %int4_3622 = torch.constant.int 4
    %int32_3623 = torch.constant.int 32
    %int128_3624 = torch.constant.int 128
    %3228 = torch.prim.ListConstruct %int4_3622, %3224, %int32_3623, %int128_3624 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3229 = torch.aten._unsafe_view %3227, %3228 : !torch.vtensor<[4,?,8,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %3229, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int1_3625 = torch.constant.int 1
    %int2_3626 = torch.constant.int 2
    %3230 = torch.aten.transpose.int %3146, %int1_3625, %int2_3626 : !torch.vtensor<[4,?,32,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,32,?,128],f16>
    torch.bind_symbolic_shape %3230, [%292], affine_map<()[s0] -> (4, 32, s0 * 32, 128)> : !torch.vtensor<[4,32,?,128],f16>
    %int1_3627 = torch.constant.int 1
    %int2_3628 = torch.constant.int 2
    %3231 = torch.aten.transpose.int %3222, %int1_3627, %int2_3628 : !torch.vtensor<[4,?,32,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,32,?,128],f16>
    torch.bind_symbolic_shape %3231, [%292], affine_map<()[s0] -> (4, 32, s0 * 32, 128)> : !torch.vtensor<[4,32,?,128],f16>
    %int1_3629 = torch.constant.int 1
    %int2_3630 = torch.constant.int 2
    %3232 = torch.aten.transpose.int %3229, %int1_3629, %int2_3630 : !torch.vtensor<[4,?,32,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,32,?,128],f16>
    torch.bind_symbolic_shape %3232, [%292], affine_map<()[s0] -> (4, 32, s0 * 32, 128)> : !torch.vtensor<[4,32,?,128],f16>
    %float0.000000e00_3631 = torch.constant.float 0.000000e+00
    %false_3632 = torch.constant.bool false
    %none_3633 = torch.constant.none
    %3233:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%3230, %3231, %3232, %float0.000000e00_3631, %false_3632, %320, %none_3633) : (!torch.vtensor<[4,32,?,128],f16>, !torch.vtensor<[4,32,?,128],f16>, !torch.vtensor<[4,32,?,128],f16>, !torch.float, !torch.bool, !torch.vtensor<[4,1,?,?],f16>, !torch.none) -> (!torch.vtensor<[4,32,?,128],f16>, !torch.vtensor<[4,32,?],f32>) 
    torch.bind_symbolic_shape %3233#0, [%292], affine_map<()[s0] -> (4, 32, s0 * 32, 128)> : !torch.vtensor<[4,32,?,128],f16>
    %int1_3634 = torch.constant.int 1
    %int2_3635 = torch.constant.int 2
    %3234 = torch.aten.transpose.int %3233#0, %int1_3634, %int2_3635 : !torch.vtensor<[4,32,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %3234, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int4_3636 = torch.constant.int 4
    %int4096_3637 = torch.constant.int 4096
    %3235 = torch.prim.ListConstruct %int4_3636, %3132, %int4096_3637 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3236 = torch.aten.view %3234, %3235 : !torch.vtensor<[4,?,32,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3236, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int-2_3638 = torch.constant.int -2
    %int-1_3639 = torch.constant.int -1
    %3237 = torch.aten.transpose.int %131, %int-2_3638, %int-1_3639 : !torch.vtensor<[4096,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,4096],f16>
    %int4_3640 = torch.constant.int 4
    %3238 = torch.aten.mul.int %int4_3640, %3132 : !torch.int, !torch.int -> !torch.int
    %int4096_3641 = torch.constant.int 4096
    %3239 = torch.prim.ListConstruct %3238, %int4096_3641 : (!torch.int, !torch.int) -> !torch.list<int>
    %3240 = torch.aten.view %3236, %3239 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %3240, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %3241 = torch.aten.mm %3240, %3237 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %3241, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_3642 = torch.constant.int 4
    %int4096_3643 = torch.constant.int 4096
    %3242 = torch.prim.ListConstruct %int4_3642, %3132, %int4096_3643 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3243 = torch.aten.view %3241, %3242 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3243, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_3644 = torch.constant.int 1
    %3244 = torch.aten.add.Tensor %3081, %3243, %int1_3644 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3244, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int6_3645 = torch.constant.int 6
    %3245 = torch.prims.convert_element_type %3244, %int6_3645 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %3245, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_3646 = torch.constant.int 2
    %3246 = torch.aten.pow.Tensor_Scalar %3245, %int2_3646 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %3246, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int-1_3647 = torch.constant.int -1
    %3247 = torch.prim.ListConstruct %int-1_3647 : (!torch.int) -> !torch.list<int>
    %true_3648 = torch.constant.bool true
    %none_3649 = torch.constant.none
    %3248 = torch.aten.mean.dim %3246, %3247, %true_3648, %none_3649 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %3248, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_3650 = torch.constant.float 9.9999997473787516E-6
    %int1_3651 = torch.constant.int 1
    %3249 = torch.aten.add.Scalar %3248, %float9.999990e-06_3650, %int1_3651 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %3249, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %3250 = torch.aten.rsqrt %3249 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %3250, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %3251 = torch.aten.mul.Tensor %3245, %3250 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %3251, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %3252 = torch.aten.mul.Tensor %132, %3251 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %3252, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int5_3652 = torch.constant.int 5
    %3253 = torch.prims.convert_element_type %3252, %int5_3652 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3253, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int-2_3653 = torch.constant.int -2
    %int-1_3654 = torch.constant.int -1
    %3254 = torch.aten.transpose.int %133, %int-2_3653, %int-1_3654 : !torch.vtensor<[14336,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,14336],f16>
    %int4_3655 = torch.constant.int 4
    %3255 = torch.aten.mul.int %int4_3655, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_3656 = torch.constant.int 4096
    %3256 = torch.prim.ListConstruct %3255, %int4096_3656 : (!torch.int, !torch.int) -> !torch.list<int>
    %3257 = torch.aten.view %3253, %3256 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %3257, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %3258 = torch.aten.mm %3257, %3254 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,14336],f16> -> !torch.vtensor<[?,14336],f16>
    torch.bind_symbolic_shape %3258, [%292], affine_map<()[s0] -> (s0 * 128, 14336)> : !torch.vtensor<[?,14336],f16>
    %int4_3657 = torch.constant.int 4
    %int14336_3658 = torch.constant.int 14336
    %3259 = torch.prim.ListConstruct %int4_3657, %294, %int14336_3658 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3260 = torch.aten.view %3258, %3259 : !torch.vtensor<[?,14336],f16>, !torch.list<int> -> !torch.vtensor<[4,?,14336],f16>
    torch.bind_symbolic_shape %3260, [%292], affine_map<()[s0] -> (4, s0 * 32, 14336)> : !torch.vtensor<[4,?,14336],f16>
    %3261 = torch.aten.silu %3260 : !torch.vtensor<[4,?,14336],f16> -> !torch.vtensor<[4,?,14336],f16>
    torch.bind_symbolic_shape %3261, [%292], affine_map<()[s0] -> (4, s0 * 32, 14336)> : !torch.vtensor<[4,?,14336],f16>
    %int-2_3659 = torch.constant.int -2
    %int-1_3660 = torch.constant.int -1
    %3262 = torch.aten.transpose.int %134, %int-2_3659, %int-1_3660 : !torch.vtensor<[14336,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,14336],f16>
    %int4_3661 = torch.constant.int 4
    %3263 = torch.aten.mul.int %int4_3661, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_3662 = torch.constant.int 4096
    %3264 = torch.prim.ListConstruct %3263, %int4096_3662 : (!torch.int, !torch.int) -> !torch.list<int>
    %3265 = torch.aten.view %3253, %3264 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %3265, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %3266 = torch.aten.mm %3265, %3262 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,14336],f16> -> !torch.vtensor<[?,14336],f16>
    torch.bind_symbolic_shape %3266, [%292], affine_map<()[s0] -> (s0 * 128, 14336)> : !torch.vtensor<[?,14336],f16>
    %int4_3663 = torch.constant.int 4
    %int14336_3664 = torch.constant.int 14336
    %3267 = torch.prim.ListConstruct %int4_3663, %294, %int14336_3664 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3268 = torch.aten.view %3266, %3267 : !torch.vtensor<[?,14336],f16>, !torch.list<int> -> !torch.vtensor<[4,?,14336],f16>
    torch.bind_symbolic_shape %3268, [%292], affine_map<()[s0] -> (4, s0 * 32, 14336)> : !torch.vtensor<[4,?,14336],f16>
    %3269 = torch.aten.mul.Tensor %3261, %3268 : !torch.vtensor<[4,?,14336],f16>, !torch.vtensor<[4,?,14336],f16> -> !torch.vtensor<[4,?,14336],f16>
    torch.bind_symbolic_shape %3269, [%292], affine_map<()[s0] -> (4, s0 * 32, 14336)> : !torch.vtensor<[4,?,14336],f16>
    %int-2_3665 = torch.constant.int -2
    %int-1_3666 = torch.constant.int -1
    %3270 = torch.aten.transpose.int %135, %int-2_3665, %int-1_3666 : !torch.vtensor<[4096,14336],f16>, !torch.int, !torch.int -> !torch.vtensor<[14336,4096],f16>
    %int1_3667 = torch.constant.int 1
    %3271 = torch.aten.size.int %3260, %int1_3667 : !torch.vtensor<[4,?,14336],f16>, !torch.int -> !torch.int
    %int4_3668 = torch.constant.int 4
    %3272 = torch.aten.mul.int %int4_3668, %3271 : !torch.int, !torch.int -> !torch.int
    %int14336_3669 = torch.constant.int 14336
    %3273 = torch.prim.ListConstruct %3272, %int14336_3669 : (!torch.int, !torch.int) -> !torch.list<int>
    %3274 = torch.aten.view %3269, %3273 : !torch.vtensor<[4,?,14336],f16>, !torch.list<int> -> !torch.vtensor<[?,14336],f16>
    torch.bind_symbolic_shape %3274, [%292], affine_map<()[s0] -> (s0 * 128, 14336)> : !torch.vtensor<[?,14336],f16>
    %3275 = torch.aten.mm %3274, %3270 : !torch.vtensor<[?,14336],f16>, !torch.vtensor<[14336,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %3275, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_3670 = torch.constant.int 4
    %int4096_3671 = torch.constant.int 4096
    %3276 = torch.prim.ListConstruct %int4_3670, %3271, %int4096_3671 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3277 = torch.aten.view %3275, %3276 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3277, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_3672 = torch.constant.int 1
    %3278 = torch.aten.add.Tensor %3244, %3277, %int1_3672 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3278, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int6_3673 = torch.constant.int 6
    %3279 = torch.prims.convert_element_type %3278, %int6_3673 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %3279, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_3674 = torch.constant.int 2
    %3280 = torch.aten.pow.Tensor_Scalar %3279, %int2_3674 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %3280, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int-1_3675 = torch.constant.int -1
    %3281 = torch.prim.ListConstruct %int-1_3675 : (!torch.int) -> !torch.list<int>
    %true_3676 = torch.constant.bool true
    %none_3677 = torch.constant.none
    %3282 = torch.aten.mean.dim %3280, %3281, %true_3676, %none_3677 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %3282, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_3678 = torch.constant.float 9.9999997473787516E-6
    %int1_3679 = torch.constant.int 1
    %3283 = torch.aten.add.Scalar %3282, %float9.999990e-06_3678, %int1_3679 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %3283, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %3284 = torch.aten.rsqrt %3283 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %3284, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %3285 = torch.aten.mul.Tensor %3279, %3284 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %3285, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %3286 = torch.aten.mul.Tensor %136, %3285 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %3286, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int5_3680 = torch.constant.int 5
    %3287 = torch.prims.convert_element_type %3286, %int5_3680 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3287, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int-2_3681 = torch.constant.int -2
    %int-1_3682 = torch.constant.int -1
    %3288 = torch.aten.transpose.int %137, %int-2_3681, %int-1_3682 : !torch.vtensor<[4096,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,4096],f16>
    %int4_3683 = torch.constant.int 4
    %3289 = torch.aten.mul.int %int4_3683, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_3684 = torch.constant.int 4096
    %3290 = torch.prim.ListConstruct %3289, %int4096_3684 : (!torch.int, !torch.int) -> !torch.list<int>
    %3291 = torch.aten.view %3287, %3290 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %3291, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %3292 = torch.aten.mm %3291, %3288 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %3292, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_3685 = torch.constant.int 4
    %int4096_3686 = torch.constant.int 4096
    %3293 = torch.prim.ListConstruct %int4_3685, %294, %int4096_3686 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3294 = torch.aten.view %3292, %3293 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3294, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int-2_3687 = torch.constant.int -2
    %int-1_3688 = torch.constant.int -1
    %3295 = torch.aten.transpose.int %138, %int-2_3687, %int-1_3688 : !torch.vtensor<[1024,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,1024],f16>
    %int4_3689 = torch.constant.int 4
    %3296 = torch.aten.mul.int %int4_3689, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_3690 = torch.constant.int 4096
    %3297 = torch.prim.ListConstruct %3296, %int4096_3690 : (!torch.int, !torch.int) -> !torch.list<int>
    %3298 = torch.aten.view %3287, %3297 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %3298, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %3299 = torch.aten.mm %3298, %3295 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1024],f16> -> !torch.vtensor<[?,1024],f16>
    torch.bind_symbolic_shape %3299, [%292], affine_map<()[s0] -> (s0 * 128, 1024)> : !torch.vtensor<[?,1024],f16>
    %int4_3691 = torch.constant.int 4
    %int1024_3692 = torch.constant.int 1024
    %3300 = torch.prim.ListConstruct %int4_3691, %294, %int1024_3692 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3301 = torch.aten.view %3299, %3300 : !torch.vtensor<[?,1024],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1024],f16>
    torch.bind_symbolic_shape %3301, [%292], affine_map<()[s0] -> (4, s0 * 32, 1024)> : !torch.vtensor<[4,?,1024],f16>
    %int-2_3693 = torch.constant.int -2
    %int-1_3694 = torch.constant.int -1
    %3302 = torch.aten.transpose.int %139, %int-2_3693, %int-1_3694 : !torch.vtensor<[1024,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,1024],f16>
    %int4_3695 = torch.constant.int 4
    %3303 = torch.aten.mul.int %int4_3695, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_3696 = torch.constant.int 4096
    %3304 = torch.prim.ListConstruct %3303, %int4096_3696 : (!torch.int, !torch.int) -> !torch.list<int>
    %3305 = torch.aten.view %3287, %3304 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %3305, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %3306 = torch.aten.mm %3305, %3302 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1024],f16> -> !torch.vtensor<[?,1024],f16>
    torch.bind_symbolic_shape %3306, [%292], affine_map<()[s0] -> (s0 * 128, 1024)> : !torch.vtensor<[?,1024],f16>
    %int4_3697 = torch.constant.int 4
    %int1024_3698 = torch.constant.int 1024
    %3307 = torch.prim.ListConstruct %int4_3697, %294, %int1024_3698 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3308 = torch.aten.view %3306, %3307 : !torch.vtensor<[?,1024],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1024],f16>
    torch.bind_symbolic_shape %3308, [%292], affine_map<()[s0] -> (4, s0 * 32, 1024)> : !torch.vtensor<[4,?,1024],f16>
    %int4_3699 = torch.constant.int 4
    %int32_3700 = torch.constant.int 32
    %int128_3701 = torch.constant.int 128
    %3309 = torch.prim.ListConstruct %int4_3699, %294, %int32_3700, %int128_3701 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3310 = torch.aten.view %3294, %3309 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %3310, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int4_3702 = torch.constant.int 4
    %int8_3703 = torch.constant.int 8
    %int128_3704 = torch.constant.int 128
    %3311 = torch.prim.ListConstruct %int4_3702, %294, %int8_3703, %int128_3704 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3312 = torch.aten.view %3301, %3311 : !torch.vtensor<[4,?,1024],f16>, !torch.list<int> -> !torch.vtensor<[4,?,8,128],f16>
    torch.bind_symbolic_shape %3312, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 128)> : !torch.vtensor<[4,?,8,128],f16>
    %int4_3705 = torch.constant.int 4
    %int8_3706 = torch.constant.int 8
    %int128_3707 = torch.constant.int 128
    %3313 = torch.prim.ListConstruct %int4_3705, %294, %int8_3706, %int128_3707 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3314 = torch.aten.view %3308, %3313 : !torch.vtensor<[4,?,1024],f16>, !torch.list<int> -> !torch.vtensor<[4,?,8,128],f16>
    torch.bind_symbolic_shape %3314, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 128)> : !torch.vtensor<[4,?,8,128],f16>
    %int131072_3708 = torch.constant.int 131072
    %none_3709 = torch.constant.none
    %none_3710 = torch.constant.none
    %cpu_3711 = torch.constant.device "cpu"
    %false_3712 = torch.constant.bool false
    %3315 = torch.aten.arange %int131072_3708, %none_3709, %none_3710, %cpu_3711, %false_3712 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64>
    %int0_3713 = torch.constant.int 0
    %int128_3714 = torch.constant.int 128
    %int2_3715 = torch.constant.int 2
    %none_3716 = torch.constant.none
    %none_3717 = torch.constant.none
    %cpu_3718 = torch.constant.device "cpu"
    %false_3719 = torch.constant.bool false
    %3316 = torch.aten.arange.start_step %int0_3713, %int128_3714, %int2_3715, %none_3716, %none_3717, %cpu_3718, %false_3719 : !torch.int, !torch.int, !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64>
    %int0_3720 = torch.constant.int 0
    %int0_3721 = torch.constant.int 0
    %int64_3722 = torch.constant.int 64
    %int1_3723 = torch.constant.int 1
    %3317 = torch.aten.slice.Tensor %3316, %int0_3720, %int0_3721, %int64_3722, %int1_3723 : !torch.vtensor<[64],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[64],si64>
    %int6_3724 = torch.constant.int 6
    %3318 = torch.prims.convert_element_type %3317, %int6_3724 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32>
    %int128_3725 = torch.constant.int 128
    %3319 = torch.aten.div.Scalar %3318, %int128_3725 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32>
    %float5.000000e05_3726 = torch.constant.float 5.000000e+05
    %3320 = torch.aten.pow.Scalar %float5.000000e05_3726, %3319 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %3321 = torch.aten.reciprocal %3320 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %float1.000000e00_3727 = torch.constant.float 1.000000e+00
    %3322 = torch.aten.mul.Scalar %3321, %float1.000000e00_3727 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32>
    %int131072_3728 = torch.constant.int 131072
    %int1_3729 = torch.constant.int 1
    %3323 = torch.prim.ListConstruct %int131072_3728, %int1_3729 : (!torch.int, !torch.int) -> !torch.list<int>
    %3324 = torch.aten.view %3315, %3323 : !torch.vtensor<[131072],si64>, !torch.list<int> -> !torch.vtensor<[131072,1],si64>
    %3325 = torch.aten.mul.Tensor %3324, %3322 : !torch.vtensor<[131072,1],si64>, !torch.vtensor<[64],f32> -> !torch.vtensor<[131072,64],f32>
    %3326 = torch.aten.cos %3325 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %3327 = torch.aten.sin %3325 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %3328 = torch.aten.complex %3326, %3327 : !torch.vtensor<[131072,64],f32>, !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],complex<f32>>
    %int1_3730 = torch.constant.int 1
    %3329 = torch.aten.size.int %3294, %int1_3730 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.int
    %int0_3731 = torch.constant.int 0
    %3330 = torch.aten.add.int %int0_3731, %3329 : !torch.int, !torch.int -> !torch.int
    %int0_3732 = torch.constant.int 0
    %int0_3733 = torch.constant.int 0
    %int1_3734 = torch.constant.int 1
    %3331 = torch.aten.slice.Tensor %3328, %int0_3732, %int0_3733, %3330, %int1_3734 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %3331, [%292], affine_map<()[s0] -> (s0 * 32, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_3735 = torch.constant.int 1
    %int0_3736 = torch.constant.int 0
    %int9223372036854775807_3737 = torch.constant.int 9223372036854775807
    %int1_3738 = torch.constant.int 1
    %3332 = torch.aten.slice.Tensor %3331, %int1_3735, %int0_3736, %int9223372036854775807_3737, %int1_3738 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %3332, [%292], affine_map<()[s0] -> (s0 * 32, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_3739 = torch.constant.int 0
    %3333 = torch.aten.unsqueeze %3332, %int0_3739 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %3333, [%292], affine_map<()[s0] -> (1, s0 * 32, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_3740 = torch.constant.int 2
    %3334 = torch.aten.unsqueeze %3333, %int2_3740 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %3334, [%292], affine_map<()[s0] -> (1, s0 * 32, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_3741 = torch.constant.int 3
    %int0_3742 = torch.constant.int 0
    %int9223372036854775807_3743 = torch.constant.int 9223372036854775807
    %int1_3744 = torch.constant.int 1
    %3335 = torch.aten.slice.Tensor %3334, %int3_3741, %int0_3742, %int9223372036854775807_3743, %int1_3744 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %3335, [%292], affine_map<()[s0] -> (1, s0 * 32, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %3336 = torch_c.to_builtin_tensor %3310 : !torch.vtensor<[4,?,32,128],f16> -> tensor<4x?x32x128xf16>
    %c1_3745 = arith.constant 1 : index
    %dim_3746 = tensor.dim %3336, %c1_3745 : tensor<4x?x32x128xf16>
    %3337 = flow.tensor.bitcast %3336 : tensor<4x?x32x128xf16>{%dim_3746} -> tensor<4x?x32x64xcomplex<f16>>{%dim_3746}
    %3338 = torch_c.from_builtin_tensor %3337 : tensor<4x?x32x64xcomplex<f16>> -> !torch.vtensor<[4,?,32,64],complex<f16>>
    torch.bind_symbolic_shape %3338, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 64)> : !torch.vtensor<[4,?,32,64],complex<f16>>
    %3339 = torch.aten.mul.Tensor %3338, %3335 : !torch.vtensor<[4,?,32,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,32,64],complex<f32>>
    torch.bind_symbolic_shape %3339, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 64)> : !torch.vtensor<[4,?,32,64],complex<f32>>
    %3340 = torch_c.to_builtin_tensor %3339 : !torch.vtensor<[4,?,32,64],complex<f32>> -> tensor<4x?x32x64xcomplex<f32>>
    %c1_3747 = arith.constant 1 : index
    %dim_3748 = tensor.dim %3340, %c1_3747 : tensor<4x?x32x64xcomplex<f32>>
    %3341 = flow.tensor.bitcast %3340 : tensor<4x?x32x64xcomplex<f32>>{%dim_3748} -> tensor<4x?x32x128xf32>{%dim_3748}
    %3342 = torch_c.from_builtin_tensor %3341 : tensor<4x?x32x128xf32> -> !torch.vtensor<[4,?,32,128],f32>
    torch.bind_symbolic_shape %3342, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f32>
    %int5_3749 = torch.constant.int 5
    %3343 = torch.prims.convert_element_type %3342, %int5_3749 : !torch.vtensor<[4,?,32,128],f32>, !torch.int -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %3343, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int131072_3750 = torch.constant.int 131072
    %none_3751 = torch.constant.none
    %none_3752 = torch.constant.none
    %cpu_3753 = torch.constant.device "cpu"
    %false_3754 = torch.constant.bool false
    %3344 = torch.aten.arange %int131072_3750, %none_3751, %none_3752, %cpu_3753, %false_3754 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64>
    %int0_3755 = torch.constant.int 0
    %int128_3756 = torch.constant.int 128
    %int2_3757 = torch.constant.int 2
    %none_3758 = torch.constant.none
    %none_3759 = torch.constant.none
    %cpu_3760 = torch.constant.device "cpu"
    %false_3761 = torch.constant.bool false
    %3345 = torch.aten.arange.start_step %int0_3755, %int128_3756, %int2_3757, %none_3758, %none_3759, %cpu_3760, %false_3761 : !torch.int, !torch.int, !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64>
    %int0_3762 = torch.constant.int 0
    %int0_3763 = torch.constant.int 0
    %int64_3764 = torch.constant.int 64
    %int1_3765 = torch.constant.int 1
    %3346 = torch.aten.slice.Tensor %3345, %int0_3762, %int0_3763, %int64_3764, %int1_3765 : !torch.vtensor<[64],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[64],si64>
    %int6_3766 = torch.constant.int 6
    %3347 = torch.prims.convert_element_type %3346, %int6_3766 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32>
    %int128_3767 = torch.constant.int 128
    %3348 = torch.aten.div.Scalar %3347, %int128_3767 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32>
    %float5.000000e05_3768 = torch.constant.float 5.000000e+05
    %3349 = torch.aten.pow.Scalar %float5.000000e05_3768, %3348 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %3350 = torch.aten.reciprocal %3349 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %float1.000000e00_3769 = torch.constant.float 1.000000e+00
    %3351 = torch.aten.mul.Scalar %3350, %float1.000000e00_3769 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32>
    %int131072_3770 = torch.constant.int 131072
    %int1_3771 = torch.constant.int 1
    %3352 = torch.prim.ListConstruct %int131072_3770, %int1_3771 : (!torch.int, !torch.int) -> !torch.list<int>
    %3353 = torch.aten.view %3344, %3352 : !torch.vtensor<[131072],si64>, !torch.list<int> -> !torch.vtensor<[131072,1],si64>
    %3354 = torch.aten.mul.Tensor %3353, %3351 : !torch.vtensor<[131072,1],si64>, !torch.vtensor<[64],f32> -> !torch.vtensor<[131072,64],f32>
    %3355 = torch.aten.cos %3354 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %3356 = torch.aten.sin %3354 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %3357 = torch.aten.complex %3355, %3356 : !torch.vtensor<[131072,64],f32>, !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],complex<f32>>
    %int1_3772 = torch.constant.int 1
    %3358 = torch.aten.size.int %3301, %int1_3772 : !torch.vtensor<[4,?,1024],f16>, !torch.int -> !torch.int
    %int0_3773 = torch.constant.int 0
    %3359 = torch.aten.add.int %int0_3773, %3358 : !torch.int, !torch.int -> !torch.int
    %int0_3774 = torch.constant.int 0
    %int0_3775 = torch.constant.int 0
    %int1_3776 = torch.constant.int 1
    %3360 = torch.aten.slice.Tensor %3357, %int0_3774, %int0_3775, %3359, %int1_3776 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %3360, [%292], affine_map<()[s0] -> (s0 * 32, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_3777 = torch.constant.int 1
    %int0_3778 = torch.constant.int 0
    %int9223372036854775807_3779 = torch.constant.int 9223372036854775807
    %int1_3780 = torch.constant.int 1
    %3361 = torch.aten.slice.Tensor %3360, %int1_3777, %int0_3778, %int9223372036854775807_3779, %int1_3780 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %3361, [%292], affine_map<()[s0] -> (s0 * 32, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_3781 = torch.constant.int 0
    %3362 = torch.aten.unsqueeze %3361, %int0_3781 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %3362, [%292], affine_map<()[s0] -> (1, s0 * 32, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_3782 = torch.constant.int 2
    %3363 = torch.aten.unsqueeze %3362, %int2_3782 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %3363, [%292], affine_map<()[s0] -> (1, s0 * 32, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_3783 = torch.constant.int 3
    %int0_3784 = torch.constant.int 0
    %int9223372036854775807_3785 = torch.constant.int 9223372036854775807
    %int1_3786 = torch.constant.int 1
    %3364 = torch.aten.slice.Tensor %3363, %int3_3783, %int0_3784, %int9223372036854775807_3785, %int1_3786 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %3364, [%292], affine_map<()[s0] -> (1, s0 * 32, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %3365 = torch_c.to_builtin_tensor %3312 : !torch.vtensor<[4,?,8,128],f16> -> tensor<4x?x8x128xf16>
    %c1_3787 = arith.constant 1 : index
    %dim_3788 = tensor.dim %3365, %c1_3787 : tensor<4x?x8x128xf16>
    %3366 = flow.tensor.bitcast %3365 : tensor<4x?x8x128xf16>{%dim_3788} -> tensor<4x?x8x64xcomplex<f16>>{%dim_3788}
    %3367 = torch_c.from_builtin_tensor %3366 : tensor<4x?x8x64xcomplex<f16>> -> !torch.vtensor<[4,?,8,64],complex<f16>>
    torch.bind_symbolic_shape %3367, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 64)> : !torch.vtensor<[4,?,8,64],complex<f16>>
    %3368 = torch.aten.mul.Tensor %3367, %3364 : !torch.vtensor<[4,?,8,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,8,64],complex<f32>>
    torch.bind_symbolic_shape %3368, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 64)> : !torch.vtensor<[4,?,8,64],complex<f32>>
    %3369 = torch_c.to_builtin_tensor %3368 : !torch.vtensor<[4,?,8,64],complex<f32>> -> tensor<4x?x8x64xcomplex<f32>>
    %c1_3789 = arith.constant 1 : index
    %dim_3790 = tensor.dim %3369, %c1_3789 : tensor<4x?x8x64xcomplex<f32>>
    %3370 = flow.tensor.bitcast %3369 : tensor<4x?x8x64xcomplex<f32>>{%dim_3790} -> tensor<4x?x8x128xf32>{%dim_3790}
    %3371 = torch_c.from_builtin_tensor %3370 : tensor<4x?x8x128xf32> -> !torch.vtensor<[4,?,8,128],f32>
    torch.bind_symbolic_shape %3371, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 128)> : !torch.vtensor<[4,?,8,128],f32>
    %int5_3791 = torch.constant.int 5
    %3372 = torch.prims.convert_element_type %3371, %int5_3791 : !torch.vtensor<[4,?,8,128],f32>, !torch.int -> !torch.vtensor<[4,?,8,128],f16>
    torch.bind_symbolic_shape %3372, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 128)> : !torch.vtensor<[4,?,8,128],f16>
    %int64_3792 = torch.constant.int 64
    %3373 = torch.aten.mul.Scalar %arg2, %int64_3792 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %3373, [%292], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int30 = torch.constant.int 30
    %int1_3793 = torch.constant.int 1
    %3374 = torch.aten.add.Scalar %3373, %int30, %int1_3793 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %3374, [%292], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int4_3794 = torch.constant.int 4
    %int32_3795 = torch.constant.int 32
    %int8_3796 = torch.constant.int 8
    %int128_3797 = torch.constant.int 128
    %3375 = torch.prim.ListConstruct %int4_3794, %425, %int32_3795, %int8_3796, %int128_3797 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3376 = torch.aten.view %3372, %3375 : !torch.vtensor<[4,?,8,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,8,128],f16>
    torch.bind_symbolic_shape %3376, [%292], affine_map<()[s0] -> (4, s0, 32, 8, 128)> : !torch.vtensor<[4,?,32,8,128],f16>
    %int4_3798 = torch.constant.int 4
    %3377 = torch.aten.mul.int %int4_3798, %425 : !torch.int, !torch.int -> !torch.int
    %int32_3799 = torch.constant.int 32
    %int8_3800 = torch.constant.int 8
    %int128_3801 = torch.constant.int 128
    %3378 = torch.prim.ListConstruct %3377, %int32_3799, %int8_3800, %int128_3801 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3379 = torch.aten.view %3376, %3378 : !torch.vtensor<[4,?,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %3379, [%292], affine_map<()[s0] -> (s0 * 4, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int4_3802 = torch.constant.int 4
    %3380 = torch.aten.mul.int %int4_3802, %425 : !torch.int, !torch.int -> !torch.int
    %3381 = torch.prim.ListConstruct %3380 : (!torch.int) -> !torch.list<int>
    %3382 = torch.aten.view %3374, %3381 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %3382, [%292], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int32_3803 = torch.constant.int 32
    %int2_3804 = torch.constant.int 2
    %int32_3805 = torch.constant.int 32
    %int8_3806 = torch.constant.int 8
    %int128_3807 = torch.constant.int 128
    %3383 = torch.prim.ListConstruct %416, %int32_3803, %int2_3804, %int32_3805, %int8_3806, %int128_3807 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3384 = torch.aten.view %3216, %3383 : !torch.vtensor<[?,2097152],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f16>
    torch.bind_symbolic_shape %3384, [%293], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f16>
    %int32_3808 = torch.constant.int 32
    %3385 = torch.aten.mul.int %416, %int32_3808 : !torch.int, !torch.int -> !torch.int
    %int2_3809 = torch.constant.int 2
    %3386 = torch.aten.mul.int %3385, %int2_3809 : !torch.int, !torch.int -> !torch.int
    %int32_3810 = torch.constant.int 32
    %int8_3811 = torch.constant.int 8
    %int128_3812 = torch.constant.int 128
    %3387 = torch.prim.ListConstruct %3386, %int32_3810, %int8_3811, %int128_3812 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3388 = torch.aten.view %3384, %3387 : !torch.vtensor<[?,32,2,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %3388, [%293], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %3389 = torch.prim.ListConstruct %3382 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_3813 = torch.constant.bool false
    %3390 = torch.aten.index_put %3388, %3389, %3379, %false_3813 : !torch.vtensor<[?,32,8,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,32,8,128],f16>, !torch.bool -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %3390, [%293], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int32_3814 = torch.constant.int 32
    %int2_3815 = torch.constant.int 2
    %int32_3816 = torch.constant.int 32
    %int8_3817 = torch.constant.int 8
    %int128_3818 = torch.constant.int 128
    %3391 = torch.prim.ListConstruct %416, %int32_3814, %int2_3815, %int32_3816, %int8_3817, %int128_3818 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3392 = torch.aten.view %3390, %3391 : !torch.vtensor<[?,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f16>
    torch.bind_symbolic_shape %3392, [%293], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f16>
    %int2097152_3819 = torch.constant.int 2097152
    %3393 = torch.prim.ListConstruct %416, %int2097152_3819 : (!torch.int, !torch.int) -> !torch.list<int>
    %3394 = torch.aten.view %3392, %3393 : !torch.vtensor<[?,32,2,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,2097152],f16>
    torch.bind_symbolic_shape %3394, [%293], affine_map<()[s0] -> (s0, 2097152)> : !torch.vtensor<[?,2097152],f16>
    %int32_3820 = torch.constant.int 32
    %int2_3821 = torch.constant.int 2
    %int32_3822 = torch.constant.int 32
    %int8_3823 = torch.constant.int 8
    %int128_3824 = torch.constant.int 128
    %3395 = torch.prim.ListConstruct %416, %int32_3820, %int2_3821, %int32_3822, %int8_3823, %int128_3824 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3396 = torch.aten.view %3394, %3395 : !torch.vtensor<[?,2097152],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f16>
    torch.bind_symbolic_shape %3396, [%293], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f16>
    %int32_3825 = torch.constant.int 32
    %int8_3826 = torch.constant.int 8
    %int128_3827 = torch.constant.int 128
    %3397 = torch.prim.ListConstruct %3386, %int32_3825, %int8_3826, %int128_3827 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3398 = torch.aten.view %3396, %3397 : !torch.vtensor<[?,32,2,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %3398, [%293], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int4_3828 = torch.constant.int 4
    %int32_3829 = torch.constant.int 32
    %int8_3830 = torch.constant.int 8
    %int128_3831 = torch.constant.int 128
    %3399 = torch.prim.ListConstruct %int4_3828, %425, %int32_3829, %int8_3830, %int128_3831 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3400 = torch.aten.view %3314, %3399 : !torch.vtensor<[4,?,8,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,8,128],f16>
    torch.bind_symbolic_shape %3400, [%292], affine_map<()[s0] -> (4, s0, 32, 8, 128)> : !torch.vtensor<[4,?,32,8,128],f16>
    %int4_3832 = torch.constant.int 4
    %3401 = torch.aten.mul.int %int4_3832, %425 : !torch.int, !torch.int -> !torch.int
    %int32_3833 = torch.constant.int 32
    %int8_3834 = torch.constant.int 8
    %int128_3835 = torch.constant.int 128
    %3402 = torch.prim.ListConstruct %3401, %int32_3833, %int8_3834, %int128_3835 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3403 = torch.aten.view %3400, %3402 : !torch.vtensor<[4,?,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %3403, [%292], affine_map<()[s0] -> (s0 * 4, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int1_3836 = torch.constant.int 1
    %int1_3837 = torch.constant.int 1
    %3404 = torch.aten.add.Scalar %3374, %int1_3836, %int1_3837 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %3404, [%292], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int4_3838 = torch.constant.int 4
    %3405 = torch.aten.mul.int %int4_3838, %425 : !torch.int, !torch.int -> !torch.int
    %3406 = torch.prim.ListConstruct %3405 : (!torch.int) -> !torch.list<int>
    %3407 = torch.aten.view %3404, %3406 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %3407, [%292], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %3408 = torch.prim.ListConstruct %3407 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_3839 = torch.constant.bool false
    %3409 = torch.aten.index_put %3398, %3408, %3403, %false_3839 : !torch.vtensor<[?,32,8,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,32,8,128],f16>, !torch.bool -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %3409, [%293], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int32_3840 = torch.constant.int 32
    %int2_3841 = torch.constant.int 2
    %int32_3842 = torch.constant.int 32
    %int8_3843 = torch.constant.int 8
    %int128_3844 = torch.constant.int 128
    %3410 = torch.prim.ListConstruct %416, %int32_3840, %int2_3841, %int32_3842, %int8_3843, %int128_3844 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3411 = torch.aten.view %3409, %3410 : !torch.vtensor<[?,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f16>
    torch.bind_symbolic_shape %3411, [%293], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f16>
    %int2097152_3845 = torch.constant.int 2097152
    %3412 = torch.prim.ListConstruct %416, %int2097152_3845 : (!torch.int, !torch.int) -> !torch.list<int>
    %3413 = torch.aten.view %3411, %3412 : !torch.vtensor<[?,32,2,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,2097152],f16>
    torch.bind_symbolic_shape %3413, [%293], affine_map<()[s0] -> (s0, 2097152)> : !torch.vtensor<[?,2097152],f16>
    %int-2_3846 = torch.constant.int -2
    %3414 = torch.aten.unsqueeze %3372, %int-2_3846 : !torch.vtensor<[4,?,8,128],f16>, !torch.int -> !torch.vtensor<[4,?,8,1,128],f16>
    torch.bind_symbolic_shape %3414, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 1, 128)> : !torch.vtensor<[4,?,8,1,128],f16>
    %int4_3847 = torch.constant.int 4
    %int8_3848 = torch.constant.int 8
    %int4_3849 = torch.constant.int 4
    %int128_3850 = torch.constant.int 128
    %3415 = torch.prim.ListConstruct %int4_3847, %3358, %int8_3848, %int4_3849, %int128_3850 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_3851 = torch.constant.bool false
    %3416 = torch.aten.expand %3414, %3415, %false_3851 : !torch.vtensor<[4,?,8,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,8,4,128],f16>
    torch.bind_symbolic_shape %3416, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 4, 128)> : !torch.vtensor<[4,?,8,4,128],f16>
    %int0_3852 = torch.constant.int 0
    %3417 = torch.aten.clone %3416, %int0_3852 : !torch.vtensor<[4,?,8,4,128],f16>, !torch.int -> !torch.vtensor<[4,?,8,4,128],f16>
    torch.bind_symbolic_shape %3417, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 4, 128)> : !torch.vtensor<[4,?,8,4,128],f16>
    %int4_3853 = torch.constant.int 4
    %int32_3854 = torch.constant.int 32
    %int128_3855 = torch.constant.int 128
    %3418 = torch.prim.ListConstruct %int4_3853, %3358, %int32_3854, %int128_3855 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3419 = torch.aten._unsafe_view %3417, %3418 : !torch.vtensor<[4,?,8,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %3419, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int-2_3856 = torch.constant.int -2
    %3420 = torch.aten.unsqueeze %3314, %int-2_3856 : !torch.vtensor<[4,?,8,128],f16>, !torch.int -> !torch.vtensor<[4,?,8,1,128],f16>
    torch.bind_symbolic_shape %3420, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 1, 128)> : !torch.vtensor<[4,?,8,1,128],f16>
    %int1_3857 = torch.constant.int 1
    %3421 = torch.aten.size.int %3308, %int1_3857 : !torch.vtensor<[4,?,1024],f16>, !torch.int -> !torch.int
    %int4_3858 = torch.constant.int 4
    %int8_3859 = torch.constant.int 8
    %int4_3860 = torch.constant.int 4
    %int128_3861 = torch.constant.int 128
    %3422 = torch.prim.ListConstruct %int4_3858, %3421, %int8_3859, %int4_3860, %int128_3861 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_3862 = torch.constant.bool false
    %3423 = torch.aten.expand %3420, %3422, %false_3862 : !torch.vtensor<[4,?,8,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,8,4,128],f16>
    torch.bind_symbolic_shape %3423, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 4, 128)> : !torch.vtensor<[4,?,8,4,128],f16>
    %int0_3863 = torch.constant.int 0
    %3424 = torch.aten.clone %3423, %int0_3863 : !torch.vtensor<[4,?,8,4,128],f16>, !torch.int -> !torch.vtensor<[4,?,8,4,128],f16>
    torch.bind_symbolic_shape %3424, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 4, 128)> : !torch.vtensor<[4,?,8,4,128],f16>
    %int4_3864 = torch.constant.int 4
    %int32_3865 = torch.constant.int 32
    %int128_3866 = torch.constant.int 128
    %3425 = torch.prim.ListConstruct %int4_3864, %3421, %int32_3865, %int128_3866 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3426 = torch.aten._unsafe_view %3424, %3425 : !torch.vtensor<[4,?,8,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %3426, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int1_3867 = torch.constant.int 1
    %int2_3868 = torch.constant.int 2
    %3427 = torch.aten.transpose.int %3343, %int1_3867, %int2_3868 : !torch.vtensor<[4,?,32,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,32,?,128],f16>
    torch.bind_symbolic_shape %3427, [%292], affine_map<()[s0] -> (4, 32, s0 * 32, 128)> : !torch.vtensor<[4,32,?,128],f16>
    %int1_3869 = torch.constant.int 1
    %int2_3870 = torch.constant.int 2
    %3428 = torch.aten.transpose.int %3419, %int1_3869, %int2_3870 : !torch.vtensor<[4,?,32,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,32,?,128],f16>
    torch.bind_symbolic_shape %3428, [%292], affine_map<()[s0] -> (4, 32, s0 * 32, 128)> : !torch.vtensor<[4,32,?,128],f16>
    %int1_3871 = torch.constant.int 1
    %int2_3872 = torch.constant.int 2
    %3429 = torch.aten.transpose.int %3426, %int1_3871, %int2_3872 : !torch.vtensor<[4,?,32,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,32,?,128],f16>
    torch.bind_symbolic_shape %3429, [%292], affine_map<()[s0] -> (4, 32, s0 * 32, 128)> : !torch.vtensor<[4,32,?,128],f16>
    %float0.000000e00_3873 = torch.constant.float 0.000000e+00
    %false_3874 = torch.constant.bool false
    %none_3875 = torch.constant.none
    %3430:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%3427, %3428, %3429, %float0.000000e00_3873, %false_3874, %320, %none_3875) : (!torch.vtensor<[4,32,?,128],f16>, !torch.vtensor<[4,32,?,128],f16>, !torch.vtensor<[4,32,?,128],f16>, !torch.float, !torch.bool, !torch.vtensor<[4,1,?,?],f16>, !torch.none) -> (!torch.vtensor<[4,32,?,128],f16>, !torch.vtensor<[4,32,?],f32>) 
    torch.bind_symbolic_shape %3430#0, [%292], affine_map<()[s0] -> (4, 32, s0 * 32, 128)> : !torch.vtensor<[4,32,?,128],f16>
    %int1_3876 = torch.constant.int 1
    %int2_3877 = torch.constant.int 2
    %3431 = torch.aten.transpose.int %3430#0, %int1_3876, %int2_3877 : !torch.vtensor<[4,32,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %3431, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int4_3878 = torch.constant.int 4
    %int4096_3879 = torch.constant.int 4096
    %3432 = torch.prim.ListConstruct %int4_3878, %3329, %int4096_3879 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3433 = torch.aten.view %3431, %3432 : !torch.vtensor<[4,?,32,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3433, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int-2_3880 = torch.constant.int -2
    %int-1_3881 = torch.constant.int -1
    %3434 = torch.aten.transpose.int %140, %int-2_3880, %int-1_3881 : !torch.vtensor<[4096,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,4096],f16>
    %int4_3882 = torch.constant.int 4
    %3435 = torch.aten.mul.int %int4_3882, %3329 : !torch.int, !torch.int -> !torch.int
    %int4096_3883 = torch.constant.int 4096
    %3436 = torch.prim.ListConstruct %3435, %int4096_3883 : (!torch.int, !torch.int) -> !torch.list<int>
    %3437 = torch.aten.view %3433, %3436 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %3437, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %3438 = torch.aten.mm %3437, %3434 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %3438, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_3884 = torch.constant.int 4
    %int4096_3885 = torch.constant.int 4096
    %3439 = torch.prim.ListConstruct %int4_3884, %3329, %int4096_3885 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3440 = torch.aten.view %3438, %3439 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3440, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_3886 = torch.constant.int 1
    %3441 = torch.aten.add.Tensor %3278, %3440, %int1_3886 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3441, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int6_3887 = torch.constant.int 6
    %3442 = torch.prims.convert_element_type %3441, %int6_3887 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %3442, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_3888 = torch.constant.int 2
    %3443 = torch.aten.pow.Tensor_Scalar %3442, %int2_3888 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %3443, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int-1_3889 = torch.constant.int -1
    %3444 = torch.prim.ListConstruct %int-1_3889 : (!torch.int) -> !torch.list<int>
    %true_3890 = torch.constant.bool true
    %none_3891 = torch.constant.none
    %3445 = torch.aten.mean.dim %3443, %3444, %true_3890, %none_3891 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %3445, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_3892 = torch.constant.float 9.9999997473787516E-6
    %int1_3893 = torch.constant.int 1
    %3446 = torch.aten.add.Scalar %3445, %float9.999990e-06_3892, %int1_3893 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %3446, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %3447 = torch.aten.rsqrt %3446 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %3447, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %3448 = torch.aten.mul.Tensor %3442, %3447 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %3448, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %3449 = torch.aten.mul.Tensor %141, %3448 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %3449, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int5_3894 = torch.constant.int 5
    %3450 = torch.prims.convert_element_type %3449, %int5_3894 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3450, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int-2_3895 = torch.constant.int -2
    %int-1_3896 = torch.constant.int -1
    %3451 = torch.aten.transpose.int %142, %int-2_3895, %int-1_3896 : !torch.vtensor<[14336,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,14336],f16>
    %int4_3897 = torch.constant.int 4
    %3452 = torch.aten.mul.int %int4_3897, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_3898 = torch.constant.int 4096
    %3453 = torch.prim.ListConstruct %3452, %int4096_3898 : (!torch.int, !torch.int) -> !torch.list<int>
    %3454 = torch.aten.view %3450, %3453 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %3454, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %3455 = torch.aten.mm %3454, %3451 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,14336],f16> -> !torch.vtensor<[?,14336],f16>
    torch.bind_symbolic_shape %3455, [%292], affine_map<()[s0] -> (s0 * 128, 14336)> : !torch.vtensor<[?,14336],f16>
    %int4_3899 = torch.constant.int 4
    %int14336_3900 = torch.constant.int 14336
    %3456 = torch.prim.ListConstruct %int4_3899, %294, %int14336_3900 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3457 = torch.aten.view %3455, %3456 : !torch.vtensor<[?,14336],f16>, !torch.list<int> -> !torch.vtensor<[4,?,14336],f16>
    torch.bind_symbolic_shape %3457, [%292], affine_map<()[s0] -> (4, s0 * 32, 14336)> : !torch.vtensor<[4,?,14336],f16>
    %3458 = torch.aten.silu %3457 : !torch.vtensor<[4,?,14336],f16> -> !torch.vtensor<[4,?,14336],f16>
    torch.bind_symbolic_shape %3458, [%292], affine_map<()[s0] -> (4, s0 * 32, 14336)> : !torch.vtensor<[4,?,14336],f16>
    %int-2_3901 = torch.constant.int -2
    %int-1_3902 = torch.constant.int -1
    %3459 = torch.aten.transpose.int %143, %int-2_3901, %int-1_3902 : !torch.vtensor<[14336,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,14336],f16>
    %int4_3903 = torch.constant.int 4
    %3460 = torch.aten.mul.int %int4_3903, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_3904 = torch.constant.int 4096
    %3461 = torch.prim.ListConstruct %3460, %int4096_3904 : (!torch.int, !torch.int) -> !torch.list<int>
    %3462 = torch.aten.view %3450, %3461 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %3462, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %3463 = torch.aten.mm %3462, %3459 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,14336],f16> -> !torch.vtensor<[?,14336],f16>
    torch.bind_symbolic_shape %3463, [%292], affine_map<()[s0] -> (s0 * 128, 14336)> : !torch.vtensor<[?,14336],f16>
    %int4_3905 = torch.constant.int 4
    %int14336_3906 = torch.constant.int 14336
    %3464 = torch.prim.ListConstruct %int4_3905, %294, %int14336_3906 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3465 = torch.aten.view %3463, %3464 : !torch.vtensor<[?,14336],f16>, !torch.list<int> -> !torch.vtensor<[4,?,14336],f16>
    torch.bind_symbolic_shape %3465, [%292], affine_map<()[s0] -> (4, s0 * 32, 14336)> : !torch.vtensor<[4,?,14336],f16>
    %3466 = torch.aten.mul.Tensor %3458, %3465 : !torch.vtensor<[4,?,14336],f16>, !torch.vtensor<[4,?,14336],f16> -> !torch.vtensor<[4,?,14336],f16>
    torch.bind_symbolic_shape %3466, [%292], affine_map<()[s0] -> (4, s0 * 32, 14336)> : !torch.vtensor<[4,?,14336],f16>
    %int-2_3907 = torch.constant.int -2
    %int-1_3908 = torch.constant.int -1
    %3467 = torch.aten.transpose.int %144, %int-2_3907, %int-1_3908 : !torch.vtensor<[4096,14336],f16>, !torch.int, !torch.int -> !torch.vtensor<[14336,4096],f16>
    %int1_3909 = torch.constant.int 1
    %3468 = torch.aten.size.int %3457, %int1_3909 : !torch.vtensor<[4,?,14336],f16>, !torch.int -> !torch.int
    %int4_3910 = torch.constant.int 4
    %3469 = torch.aten.mul.int %int4_3910, %3468 : !torch.int, !torch.int -> !torch.int
    %int14336_3911 = torch.constant.int 14336
    %3470 = torch.prim.ListConstruct %3469, %int14336_3911 : (!torch.int, !torch.int) -> !torch.list<int>
    %3471 = torch.aten.view %3466, %3470 : !torch.vtensor<[4,?,14336],f16>, !torch.list<int> -> !torch.vtensor<[?,14336],f16>
    torch.bind_symbolic_shape %3471, [%292], affine_map<()[s0] -> (s0 * 128, 14336)> : !torch.vtensor<[?,14336],f16>
    %3472 = torch.aten.mm %3471, %3467 : !torch.vtensor<[?,14336],f16>, !torch.vtensor<[14336,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %3472, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_3912 = torch.constant.int 4
    %int4096_3913 = torch.constant.int 4096
    %3473 = torch.prim.ListConstruct %int4_3912, %3468, %int4096_3913 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3474 = torch.aten.view %3472, %3473 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3474, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_3914 = torch.constant.int 1
    %3475 = torch.aten.add.Tensor %3441, %3474, %int1_3914 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3475, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int6_3915 = torch.constant.int 6
    %3476 = torch.prims.convert_element_type %3475, %int6_3915 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %3476, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_3916 = torch.constant.int 2
    %3477 = torch.aten.pow.Tensor_Scalar %3476, %int2_3916 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %3477, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int-1_3917 = torch.constant.int -1
    %3478 = torch.prim.ListConstruct %int-1_3917 : (!torch.int) -> !torch.list<int>
    %true_3918 = torch.constant.bool true
    %none_3919 = torch.constant.none
    %3479 = torch.aten.mean.dim %3477, %3478, %true_3918, %none_3919 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %3479, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_3920 = torch.constant.float 9.9999997473787516E-6
    %int1_3921 = torch.constant.int 1
    %3480 = torch.aten.add.Scalar %3479, %float9.999990e-06_3920, %int1_3921 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %3480, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %3481 = torch.aten.rsqrt %3480 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %3481, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %3482 = torch.aten.mul.Tensor %3476, %3481 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %3482, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %3483 = torch.aten.mul.Tensor %145, %3482 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %3483, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int5_3922 = torch.constant.int 5
    %3484 = torch.prims.convert_element_type %3483, %int5_3922 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3484, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int-2_3923 = torch.constant.int -2
    %int-1_3924 = torch.constant.int -1
    %3485 = torch.aten.transpose.int %146, %int-2_3923, %int-1_3924 : !torch.vtensor<[4096,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,4096],f16>
    %int4_3925 = torch.constant.int 4
    %3486 = torch.aten.mul.int %int4_3925, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_3926 = torch.constant.int 4096
    %3487 = torch.prim.ListConstruct %3486, %int4096_3926 : (!torch.int, !torch.int) -> !torch.list<int>
    %3488 = torch.aten.view %3484, %3487 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %3488, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %3489 = torch.aten.mm %3488, %3485 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %3489, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_3927 = torch.constant.int 4
    %int4096_3928 = torch.constant.int 4096
    %3490 = torch.prim.ListConstruct %int4_3927, %294, %int4096_3928 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3491 = torch.aten.view %3489, %3490 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3491, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int-2_3929 = torch.constant.int -2
    %int-1_3930 = torch.constant.int -1
    %3492 = torch.aten.transpose.int %147, %int-2_3929, %int-1_3930 : !torch.vtensor<[1024,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,1024],f16>
    %int4_3931 = torch.constant.int 4
    %3493 = torch.aten.mul.int %int4_3931, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_3932 = torch.constant.int 4096
    %3494 = torch.prim.ListConstruct %3493, %int4096_3932 : (!torch.int, !torch.int) -> !torch.list<int>
    %3495 = torch.aten.view %3484, %3494 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %3495, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %3496 = torch.aten.mm %3495, %3492 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1024],f16> -> !torch.vtensor<[?,1024],f16>
    torch.bind_symbolic_shape %3496, [%292], affine_map<()[s0] -> (s0 * 128, 1024)> : !torch.vtensor<[?,1024],f16>
    %int4_3933 = torch.constant.int 4
    %int1024_3934 = torch.constant.int 1024
    %3497 = torch.prim.ListConstruct %int4_3933, %294, %int1024_3934 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3498 = torch.aten.view %3496, %3497 : !torch.vtensor<[?,1024],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1024],f16>
    torch.bind_symbolic_shape %3498, [%292], affine_map<()[s0] -> (4, s0 * 32, 1024)> : !torch.vtensor<[4,?,1024],f16>
    %int-2_3935 = torch.constant.int -2
    %int-1_3936 = torch.constant.int -1
    %3499 = torch.aten.transpose.int %148, %int-2_3935, %int-1_3936 : !torch.vtensor<[1024,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,1024],f16>
    %int4_3937 = torch.constant.int 4
    %3500 = torch.aten.mul.int %int4_3937, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_3938 = torch.constant.int 4096
    %3501 = torch.prim.ListConstruct %3500, %int4096_3938 : (!torch.int, !torch.int) -> !torch.list<int>
    %3502 = torch.aten.view %3484, %3501 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %3502, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %3503 = torch.aten.mm %3502, %3499 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1024],f16> -> !torch.vtensor<[?,1024],f16>
    torch.bind_symbolic_shape %3503, [%292], affine_map<()[s0] -> (s0 * 128, 1024)> : !torch.vtensor<[?,1024],f16>
    %int4_3939 = torch.constant.int 4
    %int1024_3940 = torch.constant.int 1024
    %3504 = torch.prim.ListConstruct %int4_3939, %294, %int1024_3940 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3505 = torch.aten.view %3503, %3504 : !torch.vtensor<[?,1024],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1024],f16>
    torch.bind_symbolic_shape %3505, [%292], affine_map<()[s0] -> (4, s0 * 32, 1024)> : !torch.vtensor<[4,?,1024],f16>
    %int4_3941 = torch.constant.int 4
    %int32_3942 = torch.constant.int 32
    %int128_3943 = torch.constant.int 128
    %3506 = torch.prim.ListConstruct %int4_3941, %294, %int32_3942, %int128_3943 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3507 = torch.aten.view %3491, %3506 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %3507, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int4_3944 = torch.constant.int 4
    %int8_3945 = torch.constant.int 8
    %int128_3946 = torch.constant.int 128
    %3508 = torch.prim.ListConstruct %int4_3944, %294, %int8_3945, %int128_3946 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3509 = torch.aten.view %3498, %3508 : !torch.vtensor<[4,?,1024],f16>, !torch.list<int> -> !torch.vtensor<[4,?,8,128],f16>
    torch.bind_symbolic_shape %3509, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 128)> : !torch.vtensor<[4,?,8,128],f16>
    %int4_3947 = torch.constant.int 4
    %int8_3948 = torch.constant.int 8
    %int128_3949 = torch.constant.int 128
    %3510 = torch.prim.ListConstruct %int4_3947, %294, %int8_3948, %int128_3949 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3511 = torch.aten.view %3505, %3510 : !torch.vtensor<[4,?,1024],f16>, !torch.list<int> -> !torch.vtensor<[4,?,8,128],f16>
    torch.bind_symbolic_shape %3511, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 128)> : !torch.vtensor<[4,?,8,128],f16>
    %int131072_3950 = torch.constant.int 131072
    %none_3951 = torch.constant.none
    %none_3952 = torch.constant.none
    %cpu_3953 = torch.constant.device "cpu"
    %false_3954 = torch.constant.bool false
    %3512 = torch.aten.arange %int131072_3950, %none_3951, %none_3952, %cpu_3953, %false_3954 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64>
    %int0_3955 = torch.constant.int 0
    %int128_3956 = torch.constant.int 128
    %int2_3957 = torch.constant.int 2
    %none_3958 = torch.constant.none
    %none_3959 = torch.constant.none
    %cpu_3960 = torch.constant.device "cpu"
    %false_3961 = torch.constant.bool false
    %3513 = torch.aten.arange.start_step %int0_3955, %int128_3956, %int2_3957, %none_3958, %none_3959, %cpu_3960, %false_3961 : !torch.int, !torch.int, !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64>
    %int0_3962 = torch.constant.int 0
    %int0_3963 = torch.constant.int 0
    %int64_3964 = torch.constant.int 64
    %int1_3965 = torch.constant.int 1
    %3514 = torch.aten.slice.Tensor %3513, %int0_3962, %int0_3963, %int64_3964, %int1_3965 : !torch.vtensor<[64],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[64],si64>
    %int6_3966 = torch.constant.int 6
    %3515 = torch.prims.convert_element_type %3514, %int6_3966 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32>
    %int128_3967 = torch.constant.int 128
    %3516 = torch.aten.div.Scalar %3515, %int128_3967 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32>
    %float5.000000e05_3968 = torch.constant.float 5.000000e+05
    %3517 = torch.aten.pow.Scalar %float5.000000e05_3968, %3516 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %3518 = torch.aten.reciprocal %3517 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %float1.000000e00_3969 = torch.constant.float 1.000000e+00
    %3519 = torch.aten.mul.Scalar %3518, %float1.000000e00_3969 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32>
    %int131072_3970 = torch.constant.int 131072
    %int1_3971 = torch.constant.int 1
    %3520 = torch.prim.ListConstruct %int131072_3970, %int1_3971 : (!torch.int, !torch.int) -> !torch.list<int>
    %3521 = torch.aten.view %3512, %3520 : !torch.vtensor<[131072],si64>, !torch.list<int> -> !torch.vtensor<[131072,1],si64>
    %3522 = torch.aten.mul.Tensor %3521, %3519 : !torch.vtensor<[131072,1],si64>, !torch.vtensor<[64],f32> -> !torch.vtensor<[131072,64],f32>
    %3523 = torch.aten.cos %3522 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %3524 = torch.aten.sin %3522 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %3525 = torch.aten.complex %3523, %3524 : !torch.vtensor<[131072,64],f32>, !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],complex<f32>>
    %int1_3972 = torch.constant.int 1
    %3526 = torch.aten.size.int %3491, %int1_3972 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.int
    %int0_3973 = torch.constant.int 0
    %3527 = torch.aten.add.int %int0_3973, %3526 : !torch.int, !torch.int -> !torch.int
    %int0_3974 = torch.constant.int 0
    %int0_3975 = torch.constant.int 0
    %int1_3976 = torch.constant.int 1
    %3528 = torch.aten.slice.Tensor %3525, %int0_3974, %int0_3975, %3527, %int1_3976 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %3528, [%292], affine_map<()[s0] -> (s0 * 32, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_3977 = torch.constant.int 1
    %int0_3978 = torch.constant.int 0
    %int9223372036854775807_3979 = torch.constant.int 9223372036854775807
    %int1_3980 = torch.constant.int 1
    %3529 = torch.aten.slice.Tensor %3528, %int1_3977, %int0_3978, %int9223372036854775807_3979, %int1_3980 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %3529, [%292], affine_map<()[s0] -> (s0 * 32, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_3981 = torch.constant.int 0
    %3530 = torch.aten.unsqueeze %3529, %int0_3981 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %3530, [%292], affine_map<()[s0] -> (1, s0 * 32, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_3982 = torch.constant.int 2
    %3531 = torch.aten.unsqueeze %3530, %int2_3982 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %3531, [%292], affine_map<()[s0] -> (1, s0 * 32, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_3983 = torch.constant.int 3
    %int0_3984 = torch.constant.int 0
    %int9223372036854775807_3985 = torch.constant.int 9223372036854775807
    %int1_3986 = torch.constant.int 1
    %3532 = torch.aten.slice.Tensor %3531, %int3_3983, %int0_3984, %int9223372036854775807_3985, %int1_3986 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %3532, [%292], affine_map<()[s0] -> (1, s0 * 32, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %3533 = torch_c.to_builtin_tensor %3507 : !torch.vtensor<[4,?,32,128],f16> -> tensor<4x?x32x128xf16>
    %c1_3987 = arith.constant 1 : index
    %dim_3988 = tensor.dim %3533, %c1_3987 : tensor<4x?x32x128xf16>
    %3534 = flow.tensor.bitcast %3533 : tensor<4x?x32x128xf16>{%dim_3988} -> tensor<4x?x32x64xcomplex<f16>>{%dim_3988}
    %3535 = torch_c.from_builtin_tensor %3534 : tensor<4x?x32x64xcomplex<f16>> -> !torch.vtensor<[4,?,32,64],complex<f16>>
    torch.bind_symbolic_shape %3535, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 64)> : !torch.vtensor<[4,?,32,64],complex<f16>>
    %3536 = torch.aten.mul.Tensor %3535, %3532 : !torch.vtensor<[4,?,32,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,32,64],complex<f32>>
    torch.bind_symbolic_shape %3536, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 64)> : !torch.vtensor<[4,?,32,64],complex<f32>>
    %3537 = torch_c.to_builtin_tensor %3536 : !torch.vtensor<[4,?,32,64],complex<f32>> -> tensor<4x?x32x64xcomplex<f32>>
    %c1_3989 = arith.constant 1 : index
    %dim_3990 = tensor.dim %3537, %c1_3989 : tensor<4x?x32x64xcomplex<f32>>
    %3538 = flow.tensor.bitcast %3537 : tensor<4x?x32x64xcomplex<f32>>{%dim_3990} -> tensor<4x?x32x128xf32>{%dim_3990}
    %3539 = torch_c.from_builtin_tensor %3538 : tensor<4x?x32x128xf32> -> !torch.vtensor<[4,?,32,128],f32>
    torch.bind_symbolic_shape %3539, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f32>
    %int5_3991 = torch.constant.int 5
    %3540 = torch.prims.convert_element_type %3539, %int5_3991 : !torch.vtensor<[4,?,32,128],f32>, !torch.int -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %3540, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int131072_3992 = torch.constant.int 131072
    %none_3993 = torch.constant.none
    %none_3994 = torch.constant.none
    %cpu_3995 = torch.constant.device "cpu"
    %false_3996 = torch.constant.bool false
    %3541 = torch.aten.arange %int131072_3992, %none_3993, %none_3994, %cpu_3995, %false_3996 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64>
    %int0_3997 = torch.constant.int 0
    %int128_3998 = torch.constant.int 128
    %int2_3999 = torch.constant.int 2
    %none_4000 = torch.constant.none
    %none_4001 = torch.constant.none
    %cpu_4002 = torch.constant.device "cpu"
    %false_4003 = torch.constant.bool false
    %3542 = torch.aten.arange.start_step %int0_3997, %int128_3998, %int2_3999, %none_4000, %none_4001, %cpu_4002, %false_4003 : !torch.int, !torch.int, !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64>
    %int0_4004 = torch.constant.int 0
    %int0_4005 = torch.constant.int 0
    %int64_4006 = torch.constant.int 64
    %int1_4007 = torch.constant.int 1
    %3543 = torch.aten.slice.Tensor %3542, %int0_4004, %int0_4005, %int64_4006, %int1_4007 : !torch.vtensor<[64],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[64],si64>
    %int6_4008 = torch.constant.int 6
    %3544 = torch.prims.convert_element_type %3543, %int6_4008 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32>
    %int128_4009 = torch.constant.int 128
    %3545 = torch.aten.div.Scalar %3544, %int128_4009 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32>
    %float5.000000e05_4010 = torch.constant.float 5.000000e+05
    %3546 = torch.aten.pow.Scalar %float5.000000e05_4010, %3545 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %3547 = torch.aten.reciprocal %3546 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %float1.000000e00_4011 = torch.constant.float 1.000000e+00
    %3548 = torch.aten.mul.Scalar %3547, %float1.000000e00_4011 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32>
    %int131072_4012 = torch.constant.int 131072
    %int1_4013 = torch.constant.int 1
    %3549 = torch.prim.ListConstruct %int131072_4012, %int1_4013 : (!torch.int, !torch.int) -> !torch.list<int>
    %3550 = torch.aten.view %3541, %3549 : !torch.vtensor<[131072],si64>, !torch.list<int> -> !torch.vtensor<[131072,1],si64>
    %3551 = torch.aten.mul.Tensor %3550, %3548 : !torch.vtensor<[131072,1],si64>, !torch.vtensor<[64],f32> -> !torch.vtensor<[131072,64],f32>
    %3552 = torch.aten.cos %3551 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %3553 = torch.aten.sin %3551 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %3554 = torch.aten.complex %3552, %3553 : !torch.vtensor<[131072,64],f32>, !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],complex<f32>>
    %int1_4014 = torch.constant.int 1
    %3555 = torch.aten.size.int %3498, %int1_4014 : !torch.vtensor<[4,?,1024],f16>, !torch.int -> !torch.int
    %int0_4015 = torch.constant.int 0
    %3556 = torch.aten.add.int %int0_4015, %3555 : !torch.int, !torch.int -> !torch.int
    %int0_4016 = torch.constant.int 0
    %int0_4017 = torch.constant.int 0
    %int1_4018 = torch.constant.int 1
    %3557 = torch.aten.slice.Tensor %3554, %int0_4016, %int0_4017, %3556, %int1_4018 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %3557, [%292], affine_map<()[s0] -> (s0 * 32, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_4019 = torch.constant.int 1
    %int0_4020 = torch.constant.int 0
    %int9223372036854775807_4021 = torch.constant.int 9223372036854775807
    %int1_4022 = torch.constant.int 1
    %3558 = torch.aten.slice.Tensor %3557, %int1_4019, %int0_4020, %int9223372036854775807_4021, %int1_4022 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %3558, [%292], affine_map<()[s0] -> (s0 * 32, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_4023 = torch.constant.int 0
    %3559 = torch.aten.unsqueeze %3558, %int0_4023 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %3559, [%292], affine_map<()[s0] -> (1, s0 * 32, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_4024 = torch.constant.int 2
    %3560 = torch.aten.unsqueeze %3559, %int2_4024 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %3560, [%292], affine_map<()[s0] -> (1, s0 * 32, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_4025 = torch.constant.int 3
    %int0_4026 = torch.constant.int 0
    %int9223372036854775807_4027 = torch.constant.int 9223372036854775807
    %int1_4028 = torch.constant.int 1
    %3561 = torch.aten.slice.Tensor %3560, %int3_4025, %int0_4026, %int9223372036854775807_4027, %int1_4028 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %3561, [%292], affine_map<()[s0] -> (1, s0 * 32, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %3562 = torch_c.to_builtin_tensor %3509 : !torch.vtensor<[4,?,8,128],f16> -> tensor<4x?x8x128xf16>
    %c1_4029 = arith.constant 1 : index
    %dim_4030 = tensor.dim %3562, %c1_4029 : tensor<4x?x8x128xf16>
    %3563 = flow.tensor.bitcast %3562 : tensor<4x?x8x128xf16>{%dim_4030} -> tensor<4x?x8x64xcomplex<f16>>{%dim_4030}
    %3564 = torch_c.from_builtin_tensor %3563 : tensor<4x?x8x64xcomplex<f16>> -> !torch.vtensor<[4,?,8,64],complex<f16>>
    torch.bind_symbolic_shape %3564, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 64)> : !torch.vtensor<[4,?,8,64],complex<f16>>
    %3565 = torch.aten.mul.Tensor %3564, %3561 : !torch.vtensor<[4,?,8,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,8,64],complex<f32>>
    torch.bind_symbolic_shape %3565, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 64)> : !torch.vtensor<[4,?,8,64],complex<f32>>
    %3566 = torch_c.to_builtin_tensor %3565 : !torch.vtensor<[4,?,8,64],complex<f32>> -> tensor<4x?x8x64xcomplex<f32>>
    %c1_4031 = arith.constant 1 : index
    %dim_4032 = tensor.dim %3566, %c1_4031 : tensor<4x?x8x64xcomplex<f32>>
    %3567 = flow.tensor.bitcast %3566 : tensor<4x?x8x64xcomplex<f32>>{%dim_4032} -> tensor<4x?x8x128xf32>{%dim_4032}
    %3568 = torch_c.from_builtin_tensor %3567 : tensor<4x?x8x128xf32> -> !torch.vtensor<[4,?,8,128],f32>
    torch.bind_symbolic_shape %3568, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 128)> : !torch.vtensor<[4,?,8,128],f32>
    %int5_4033 = torch.constant.int 5
    %3569 = torch.prims.convert_element_type %3568, %int5_4033 : !torch.vtensor<[4,?,8,128],f32>, !torch.int -> !torch.vtensor<[4,?,8,128],f16>
    torch.bind_symbolic_shape %3569, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 128)> : !torch.vtensor<[4,?,8,128],f16>
    %int64_4034 = torch.constant.int 64
    %3570 = torch.aten.mul.Scalar %arg2, %int64_4034 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %3570, [%292], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int32_4035 = torch.constant.int 32
    %int1_4036 = torch.constant.int 1
    %3571 = torch.aten.add.Scalar %3570, %int32_4035, %int1_4036 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %3571, [%292], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int4_4037 = torch.constant.int 4
    %int32_4038 = torch.constant.int 32
    %int8_4039 = torch.constant.int 8
    %int128_4040 = torch.constant.int 128
    %3572 = torch.prim.ListConstruct %int4_4037, %425, %int32_4038, %int8_4039, %int128_4040 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3573 = torch.aten.view %3569, %3572 : !torch.vtensor<[4,?,8,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,8,128],f16>
    torch.bind_symbolic_shape %3573, [%292], affine_map<()[s0] -> (4, s0, 32, 8, 128)> : !torch.vtensor<[4,?,32,8,128],f16>
    %int4_4041 = torch.constant.int 4
    %3574 = torch.aten.mul.int %int4_4041, %425 : !torch.int, !torch.int -> !torch.int
    %int32_4042 = torch.constant.int 32
    %int8_4043 = torch.constant.int 8
    %int128_4044 = torch.constant.int 128
    %3575 = torch.prim.ListConstruct %3574, %int32_4042, %int8_4043, %int128_4044 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3576 = torch.aten.view %3573, %3575 : !torch.vtensor<[4,?,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %3576, [%292], affine_map<()[s0] -> (s0 * 4, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int4_4045 = torch.constant.int 4
    %3577 = torch.aten.mul.int %int4_4045, %425 : !torch.int, !torch.int -> !torch.int
    %3578 = torch.prim.ListConstruct %3577 : (!torch.int) -> !torch.list<int>
    %3579 = torch.aten.view %3571, %3578 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %3579, [%292], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int32_4046 = torch.constant.int 32
    %int2_4047 = torch.constant.int 2
    %int32_4048 = torch.constant.int 32
    %int8_4049 = torch.constant.int 8
    %int128_4050 = torch.constant.int 128
    %3580 = torch.prim.ListConstruct %416, %int32_4046, %int2_4047, %int32_4048, %int8_4049, %int128_4050 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3581 = torch.aten.view %3413, %3580 : !torch.vtensor<[?,2097152],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f16>
    torch.bind_symbolic_shape %3581, [%293], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f16>
    %int32_4051 = torch.constant.int 32
    %3582 = torch.aten.mul.int %416, %int32_4051 : !torch.int, !torch.int -> !torch.int
    %int2_4052 = torch.constant.int 2
    %3583 = torch.aten.mul.int %3582, %int2_4052 : !torch.int, !torch.int -> !torch.int
    %int32_4053 = torch.constant.int 32
    %int8_4054 = torch.constant.int 8
    %int128_4055 = torch.constant.int 128
    %3584 = torch.prim.ListConstruct %3583, %int32_4053, %int8_4054, %int128_4055 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3585 = torch.aten.view %3581, %3584 : !torch.vtensor<[?,32,2,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %3585, [%293], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %3586 = torch.prim.ListConstruct %3579 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_4056 = torch.constant.bool false
    %3587 = torch.aten.index_put %3585, %3586, %3576, %false_4056 : !torch.vtensor<[?,32,8,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,32,8,128],f16>, !torch.bool -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %3587, [%293], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int32_4057 = torch.constant.int 32
    %int2_4058 = torch.constant.int 2
    %int32_4059 = torch.constant.int 32
    %int8_4060 = torch.constant.int 8
    %int128_4061 = torch.constant.int 128
    %3588 = torch.prim.ListConstruct %416, %int32_4057, %int2_4058, %int32_4059, %int8_4060, %int128_4061 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3589 = torch.aten.view %3587, %3588 : !torch.vtensor<[?,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f16>
    torch.bind_symbolic_shape %3589, [%293], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f16>
    %int2097152_4062 = torch.constant.int 2097152
    %3590 = torch.prim.ListConstruct %416, %int2097152_4062 : (!torch.int, !torch.int) -> !torch.list<int>
    %3591 = torch.aten.view %3589, %3590 : !torch.vtensor<[?,32,2,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,2097152],f16>
    torch.bind_symbolic_shape %3591, [%293], affine_map<()[s0] -> (s0, 2097152)> : !torch.vtensor<[?,2097152],f16>
    %int32_4063 = torch.constant.int 32
    %int2_4064 = torch.constant.int 2
    %int32_4065 = torch.constant.int 32
    %int8_4066 = torch.constant.int 8
    %int128_4067 = torch.constant.int 128
    %3592 = torch.prim.ListConstruct %416, %int32_4063, %int2_4064, %int32_4065, %int8_4066, %int128_4067 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3593 = torch.aten.view %3591, %3592 : !torch.vtensor<[?,2097152],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f16>
    torch.bind_symbolic_shape %3593, [%293], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f16>
    %int32_4068 = torch.constant.int 32
    %int8_4069 = torch.constant.int 8
    %int128_4070 = torch.constant.int 128
    %3594 = torch.prim.ListConstruct %3583, %int32_4068, %int8_4069, %int128_4070 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3595 = torch.aten.view %3593, %3594 : !torch.vtensor<[?,32,2,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %3595, [%293], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int4_4071 = torch.constant.int 4
    %int32_4072 = torch.constant.int 32
    %int8_4073 = torch.constant.int 8
    %int128_4074 = torch.constant.int 128
    %3596 = torch.prim.ListConstruct %int4_4071, %425, %int32_4072, %int8_4073, %int128_4074 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3597 = torch.aten.view %3511, %3596 : !torch.vtensor<[4,?,8,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,8,128],f16>
    torch.bind_symbolic_shape %3597, [%292], affine_map<()[s0] -> (4, s0, 32, 8, 128)> : !torch.vtensor<[4,?,32,8,128],f16>
    %int4_4075 = torch.constant.int 4
    %3598 = torch.aten.mul.int %int4_4075, %425 : !torch.int, !torch.int -> !torch.int
    %int32_4076 = torch.constant.int 32
    %int8_4077 = torch.constant.int 8
    %int128_4078 = torch.constant.int 128
    %3599 = torch.prim.ListConstruct %3598, %int32_4076, %int8_4077, %int128_4078 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3600 = torch.aten.view %3597, %3599 : !torch.vtensor<[4,?,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %3600, [%292], affine_map<()[s0] -> (s0 * 4, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int1_4079 = torch.constant.int 1
    %int1_4080 = torch.constant.int 1
    %3601 = torch.aten.add.Scalar %3571, %int1_4079, %int1_4080 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %3601, [%292], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int4_4081 = torch.constant.int 4
    %3602 = torch.aten.mul.int %int4_4081, %425 : !torch.int, !torch.int -> !torch.int
    %3603 = torch.prim.ListConstruct %3602 : (!torch.int) -> !torch.list<int>
    %3604 = torch.aten.view %3601, %3603 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %3604, [%292], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %3605 = torch.prim.ListConstruct %3604 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_4082 = torch.constant.bool false
    %3606 = torch.aten.index_put %3595, %3605, %3600, %false_4082 : !torch.vtensor<[?,32,8,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,32,8,128],f16>, !torch.bool -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %3606, [%293], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int32_4083 = torch.constant.int 32
    %int2_4084 = torch.constant.int 2
    %int32_4085 = torch.constant.int 32
    %int8_4086 = torch.constant.int 8
    %int128_4087 = torch.constant.int 128
    %3607 = torch.prim.ListConstruct %416, %int32_4083, %int2_4084, %int32_4085, %int8_4086, %int128_4087 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3608 = torch.aten.view %3606, %3607 : !torch.vtensor<[?,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f16>
    torch.bind_symbolic_shape %3608, [%293], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f16>
    %int2097152_4088 = torch.constant.int 2097152
    %3609 = torch.prim.ListConstruct %416, %int2097152_4088 : (!torch.int, !torch.int) -> !torch.list<int>
    %3610 = torch.aten.view %3608, %3609 : !torch.vtensor<[?,32,2,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,2097152],f16>
    torch.bind_symbolic_shape %3610, [%293], affine_map<()[s0] -> (s0, 2097152)> : !torch.vtensor<[?,2097152],f16>
    %int-2_4089 = torch.constant.int -2
    %3611 = torch.aten.unsqueeze %3569, %int-2_4089 : !torch.vtensor<[4,?,8,128],f16>, !torch.int -> !torch.vtensor<[4,?,8,1,128],f16>
    torch.bind_symbolic_shape %3611, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 1, 128)> : !torch.vtensor<[4,?,8,1,128],f16>
    %int4_4090 = torch.constant.int 4
    %int8_4091 = torch.constant.int 8
    %int4_4092 = torch.constant.int 4
    %int128_4093 = torch.constant.int 128
    %3612 = torch.prim.ListConstruct %int4_4090, %3555, %int8_4091, %int4_4092, %int128_4093 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_4094 = torch.constant.bool false
    %3613 = torch.aten.expand %3611, %3612, %false_4094 : !torch.vtensor<[4,?,8,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,8,4,128],f16>
    torch.bind_symbolic_shape %3613, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 4, 128)> : !torch.vtensor<[4,?,8,4,128],f16>
    %int0_4095 = torch.constant.int 0
    %3614 = torch.aten.clone %3613, %int0_4095 : !torch.vtensor<[4,?,8,4,128],f16>, !torch.int -> !torch.vtensor<[4,?,8,4,128],f16>
    torch.bind_symbolic_shape %3614, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 4, 128)> : !torch.vtensor<[4,?,8,4,128],f16>
    %int4_4096 = torch.constant.int 4
    %int32_4097 = torch.constant.int 32
    %int128_4098 = torch.constant.int 128
    %3615 = torch.prim.ListConstruct %int4_4096, %3555, %int32_4097, %int128_4098 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3616 = torch.aten._unsafe_view %3614, %3615 : !torch.vtensor<[4,?,8,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %3616, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int-2_4099 = torch.constant.int -2
    %3617 = torch.aten.unsqueeze %3511, %int-2_4099 : !torch.vtensor<[4,?,8,128],f16>, !torch.int -> !torch.vtensor<[4,?,8,1,128],f16>
    torch.bind_symbolic_shape %3617, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 1, 128)> : !torch.vtensor<[4,?,8,1,128],f16>
    %int1_4100 = torch.constant.int 1
    %3618 = torch.aten.size.int %3505, %int1_4100 : !torch.vtensor<[4,?,1024],f16>, !torch.int -> !torch.int
    %int4_4101 = torch.constant.int 4
    %int8_4102 = torch.constant.int 8
    %int4_4103 = torch.constant.int 4
    %int128_4104 = torch.constant.int 128
    %3619 = torch.prim.ListConstruct %int4_4101, %3618, %int8_4102, %int4_4103, %int128_4104 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_4105 = torch.constant.bool false
    %3620 = torch.aten.expand %3617, %3619, %false_4105 : !torch.vtensor<[4,?,8,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,8,4,128],f16>
    torch.bind_symbolic_shape %3620, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 4, 128)> : !torch.vtensor<[4,?,8,4,128],f16>
    %int0_4106 = torch.constant.int 0
    %3621 = torch.aten.clone %3620, %int0_4106 : !torch.vtensor<[4,?,8,4,128],f16>, !torch.int -> !torch.vtensor<[4,?,8,4,128],f16>
    torch.bind_symbolic_shape %3621, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 4, 128)> : !torch.vtensor<[4,?,8,4,128],f16>
    %int4_4107 = torch.constant.int 4
    %int32_4108 = torch.constant.int 32
    %int128_4109 = torch.constant.int 128
    %3622 = torch.prim.ListConstruct %int4_4107, %3618, %int32_4108, %int128_4109 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3623 = torch.aten._unsafe_view %3621, %3622 : !torch.vtensor<[4,?,8,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %3623, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int1_4110 = torch.constant.int 1
    %int2_4111 = torch.constant.int 2
    %3624 = torch.aten.transpose.int %3540, %int1_4110, %int2_4111 : !torch.vtensor<[4,?,32,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,32,?,128],f16>
    torch.bind_symbolic_shape %3624, [%292], affine_map<()[s0] -> (4, 32, s0 * 32, 128)> : !torch.vtensor<[4,32,?,128],f16>
    %int1_4112 = torch.constant.int 1
    %int2_4113 = torch.constant.int 2
    %3625 = torch.aten.transpose.int %3616, %int1_4112, %int2_4113 : !torch.vtensor<[4,?,32,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,32,?,128],f16>
    torch.bind_symbolic_shape %3625, [%292], affine_map<()[s0] -> (4, 32, s0 * 32, 128)> : !torch.vtensor<[4,32,?,128],f16>
    %int1_4114 = torch.constant.int 1
    %int2_4115 = torch.constant.int 2
    %3626 = torch.aten.transpose.int %3623, %int1_4114, %int2_4115 : !torch.vtensor<[4,?,32,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,32,?,128],f16>
    torch.bind_symbolic_shape %3626, [%292], affine_map<()[s0] -> (4, 32, s0 * 32, 128)> : !torch.vtensor<[4,32,?,128],f16>
    %float0.000000e00_4116 = torch.constant.float 0.000000e+00
    %false_4117 = torch.constant.bool false
    %none_4118 = torch.constant.none
    %3627:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%3624, %3625, %3626, %float0.000000e00_4116, %false_4117, %320, %none_4118) : (!torch.vtensor<[4,32,?,128],f16>, !torch.vtensor<[4,32,?,128],f16>, !torch.vtensor<[4,32,?,128],f16>, !torch.float, !torch.bool, !torch.vtensor<[4,1,?,?],f16>, !torch.none) -> (!torch.vtensor<[4,32,?,128],f16>, !torch.vtensor<[4,32,?],f32>) 
    torch.bind_symbolic_shape %3627#0, [%292], affine_map<()[s0] -> (4, 32, s0 * 32, 128)> : !torch.vtensor<[4,32,?,128],f16>
    %int1_4119 = torch.constant.int 1
    %int2_4120 = torch.constant.int 2
    %3628 = torch.aten.transpose.int %3627#0, %int1_4119, %int2_4120 : !torch.vtensor<[4,32,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %3628, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int4_4121 = torch.constant.int 4
    %int4096_4122 = torch.constant.int 4096
    %3629 = torch.prim.ListConstruct %int4_4121, %3526, %int4096_4122 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3630 = torch.aten.view %3628, %3629 : !torch.vtensor<[4,?,32,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3630, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int-2_4123 = torch.constant.int -2
    %int-1_4124 = torch.constant.int -1
    %3631 = torch.aten.transpose.int %149, %int-2_4123, %int-1_4124 : !torch.vtensor<[4096,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,4096],f16>
    %int4_4125 = torch.constant.int 4
    %3632 = torch.aten.mul.int %int4_4125, %3526 : !torch.int, !torch.int -> !torch.int
    %int4096_4126 = torch.constant.int 4096
    %3633 = torch.prim.ListConstruct %3632, %int4096_4126 : (!torch.int, !torch.int) -> !torch.list<int>
    %3634 = torch.aten.view %3630, %3633 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %3634, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %3635 = torch.aten.mm %3634, %3631 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %3635, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_4127 = torch.constant.int 4
    %int4096_4128 = torch.constant.int 4096
    %3636 = torch.prim.ListConstruct %int4_4127, %3526, %int4096_4128 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3637 = torch.aten.view %3635, %3636 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3637, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_4129 = torch.constant.int 1
    %3638 = torch.aten.add.Tensor %3475, %3637, %int1_4129 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3638, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int6_4130 = torch.constant.int 6
    %3639 = torch.prims.convert_element_type %3638, %int6_4130 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %3639, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_4131 = torch.constant.int 2
    %3640 = torch.aten.pow.Tensor_Scalar %3639, %int2_4131 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %3640, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int-1_4132 = torch.constant.int -1
    %3641 = torch.prim.ListConstruct %int-1_4132 : (!torch.int) -> !torch.list<int>
    %true_4133 = torch.constant.bool true
    %none_4134 = torch.constant.none
    %3642 = torch.aten.mean.dim %3640, %3641, %true_4133, %none_4134 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %3642, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_4135 = torch.constant.float 9.9999997473787516E-6
    %int1_4136 = torch.constant.int 1
    %3643 = torch.aten.add.Scalar %3642, %float9.999990e-06_4135, %int1_4136 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %3643, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %3644 = torch.aten.rsqrt %3643 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %3644, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %3645 = torch.aten.mul.Tensor %3639, %3644 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %3645, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %3646 = torch.aten.mul.Tensor %150, %3645 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %3646, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int5_4137 = torch.constant.int 5
    %3647 = torch.prims.convert_element_type %3646, %int5_4137 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3647, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int-2_4138 = torch.constant.int -2
    %int-1_4139 = torch.constant.int -1
    %3648 = torch.aten.transpose.int %151, %int-2_4138, %int-1_4139 : !torch.vtensor<[14336,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,14336],f16>
    %int4_4140 = torch.constant.int 4
    %3649 = torch.aten.mul.int %int4_4140, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_4141 = torch.constant.int 4096
    %3650 = torch.prim.ListConstruct %3649, %int4096_4141 : (!torch.int, !torch.int) -> !torch.list<int>
    %3651 = torch.aten.view %3647, %3650 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %3651, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %3652 = torch.aten.mm %3651, %3648 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,14336],f16> -> !torch.vtensor<[?,14336],f16>
    torch.bind_symbolic_shape %3652, [%292], affine_map<()[s0] -> (s0 * 128, 14336)> : !torch.vtensor<[?,14336],f16>
    %int4_4142 = torch.constant.int 4
    %int14336_4143 = torch.constant.int 14336
    %3653 = torch.prim.ListConstruct %int4_4142, %294, %int14336_4143 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3654 = torch.aten.view %3652, %3653 : !torch.vtensor<[?,14336],f16>, !torch.list<int> -> !torch.vtensor<[4,?,14336],f16>
    torch.bind_symbolic_shape %3654, [%292], affine_map<()[s0] -> (4, s0 * 32, 14336)> : !torch.vtensor<[4,?,14336],f16>
    %3655 = torch.aten.silu %3654 : !torch.vtensor<[4,?,14336],f16> -> !torch.vtensor<[4,?,14336],f16>
    torch.bind_symbolic_shape %3655, [%292], affine_map<()[s0] -> (4, s0 * 32, 14336)> : !torch.vtensor<[4,?,14336],f16>
    %int-2_4144 = torch.constant.int -2
    %int-1_4145 = torch.constant.int -1
    %3656 = torch.aten.transpose.int %152, %int-2_4144, %int-1_4145 : !torch.vtensor<[14336,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,14336],f16>
    %int4_4146 = torch.constant.int 4
    %3657 = torch.aten.mul.int %int4_4146, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_4147 = torch.constant.int 4096
    %3658 = torch.prim.ListConstruct %3657, %int4096_4147 : (!torch.int, !torch.int) -> !torch.list<int>
    %3659 = torch.aten.view %3647, %3658 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %3659, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %3660 = torch.aten.mm %3659, %3656 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,14336],f16> -> !torch.vtensor<[?,14336],f16>
    torch.bind_symbolic_shape %3660, [%292], affine_map<()[s0] -> (s0 * 128, 14336)> : !torch.vtensor<[?,14336],f16>
    %int4_4148 = torch.constant.int 4
    %int14336_4149 = torch.constant.int 14336
    %3661 = torch.prim.ListConstruct %int4_4148, %294, %int14336_4149 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3662 = torch.aten.view %3660, %3661 : !torch.vtensor<[?,14336],f16>, !torch.list<int> -> !torch.vtensor<[4,?,14336],f16>
    torch.bind_symbolic_shape %3662, [%292], affine_map<()[s0] -> (4, s0 * 32, 14336)> : !torch.vtensor<[4,?,14336],f16>
    %3663 = torch.aten.mul.Tensor %3655, %3662 : !torch.vtensor<[4,?,14336],f16>, !torch.vtensor<[4,?,14336],f16> -> !torch.vtensor<[4,?,14336],f16>
    torch.bind_symbolic_shape %3663, [%292], affine_map<()[s0] -> (4, s0 * 32, 14336)> : !torch.vtensor<[4,?,14336],f16>
    %int-2_4150 = torch.constant.int -2
    %int-1_4151 = torch.constant.int -1
    %3664 = torch.aten.transpose.int %153, %int-2_4150, %int-1_4151 : !torch.vtensor<[4096,14336],f16>, !torch.int, !torch.int -> !torch.vtensor<[14336,4096],f16>
    %int1_4152 = torch.constant.int 1
    %3665 = torch.aten.size.int %3654, %int1_4152 : !torch.vtensor<[4,?,14336],f16>, !torch.int -> !torch.int
    %int4_4153 = torch.constant.int 4
    %3666 = torch.aten.mul.int %int4_4153, %3665 : !torch.int, !torch.int -> !torch.int
    %int14336_4154 = torch.constant.int 14336
    %3667 = torch.prim.ListConstruct %3666, %int14336_4154 : (!torch.int, !torch.int) -> !torch.list<int>
    %3668 = torch.aten.view %3663, %3667 : !torch.vtensor<[4,?,14336],f16>, !torch.list<int> -> !torch.vtensor<[?,14336],f16>
    torch.bind_symbolic_shape %3668, [%292], affine_map<()[s0] -> (s0 * 128, 14336)> : !torch.vtensor<[?,14336],f16>
    %3669 = torch.aten.mm %3668, %3664 : !torch.vtensor<[?,14336],f16>, !torch.vtensor<[14336,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %3669, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_4155 = torch.constant.int 4
    %int4096_4156 = torch.constant.int 4096
    %3670 = torch.prim.ListConstruct %int4_4155, %3665, %int4096_4156 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3671 = torch.aten.view %3669, %3670 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3671, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_4157 = torch.constant.int 1
    %3672 = torch.aten.add.Tensor %3638, %3671, %int1_4157 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3672, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int6_4158 = torch.constant.int 6
    %3673 = torch.prims.convert_element_type %3672, %int6_4158 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %3673, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_4159 = torch.constant.int 2
    %3674 = torch.aten.pow.Tensor_Scalar %3673, %int2_4159 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %3674, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int-1_4160 = torch.constant.int -1
    %3675 = torch.prim.ListConstruct %int-1_4160 : (!torch.int) -> !torch.list<int>
    %true_4161 = torch.constant.bool true
    %none_4162 = torch.constant.none
    %3676 = torch.aten.mean.dim %3674, %3675, %true_4161, %none_4162 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %3676, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_4163 = torch.constant.float 9.9999997473787516E-6
    %int1_4164 = torch.constant.int 1
    %3677 = torch.aten.add.Scalar %3676, %float9.999990e-06_4163, %int1_4164 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %3677, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %3678 = torch.aten.rsqrt %3677 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %3678, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %3679 = torch.aten.mul.Tensor %3673, %3678 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %3679, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %3680 = torch.aten.mul.Tensor %154, %3679 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %3680, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int5_4165 = torch.constant.int 5
    %3681 = torch.prims.convert_element_type %3680, %int5_4165 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3681, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int-2_4166 = torch.constant.int -2
    %int-1_4167 = torch.constant.int -1
    %3682 = torch.aten.transpose.int %155, %int-2_4166, %int-1_4167 : !torch.vtensor<[4096,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,4096],f16>
    %int4_4168 = torch.constant.int 4
    %3683 = torch.aten.mul.int %int4_4168, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_4169 = torch.constant.int 4096
    %3684 = torch.prim.ListConstruct %3683, %int4096_4169 : (!torch.int, !torch.int) -> !torch.list<int>
    %3685 = torch.aten.view %3681, %3684 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %3685, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %3686 = torch.aten.mm %3685, %3682 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %3686, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_4170 = torch.constant.int 4
    %int4096_4171 = torch.constant.int 4096
    %3687 = torch.prim.ListConstruct %int4_4170, %294, %int4096_4171 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3688 = torch.aten.view %3686, %3687 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3688, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int-2_4172 = torch.constant.int -2
    %int-1_4173 = torch.constant.int -1
    %3689 = torch.aten.transpose.int %156, %int-2_4172, %int-1_4173 : !torch.vtensor<[1024,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,1024],f16>
    %int4_4174 = torch.constant.int 4
    %3690 = torch.aten.mul.int %int4_4174, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_4175 = torch.constant.int 4096
    %3691 = torch.prim.ListConstruct %3690, %int4096_4175 : (!torch.int, !torch.int) -> !torch.list<int>
    %3692 = torch.aten.view %3681, %3691 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %3692, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %3693 = torch.aten.mm %3692, %3689 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1024],f16> -> !torch.vtensor<[?,1024],f16>
    torch.bind_symbolic_shape %3693, [%292], affine_map<()[s0] -> (s0 * 128, 1024)> : !torch.vtensor<[?,1024],f16>
    %int4_4176 = torch.constant.int 4
    %int1024_4177 = torch.constant.int 1024
    %3694 = torch.prim.ListConstruct %int4_4176, %294, %int1024_4177 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3695 = torch.aten.view %3693, %3694 : !torch.vtensor<[?,1024],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1024],f16>
    torch.bind_symbolic_shape %3695, [%292], affine_map<()[s0] -> (4, s0 * 32, 1024)> : !torch.vtensor<[4,?,1024],f16>
    %int-2_4178 = torch.constant.int -2
    %int-1_4179 = torch.constant.int -1
    %3696 = torch.aten.transpose.int %157, %int-2_4178, %int-1_4179 : !torch.vtensor<[1024,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,1024],f16>
    %int4_4180 = torch.constant.int 4
    %3697 = torch.aten.mul.int %int4_4180, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_4181 = torch.constant.int 4096
    %3698 = torch.prim.ListConstruct %3697, %int4096_4181 : (!torch.int, !torch.int) -> !torch.list<int>
    %3699 = torch.aten.view %3681, %3698 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %3699, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %3700 = torch.aten.mm %3699, %3696 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1024],f16> -> !torch.vtensor<[?,1024],f16>
    torch.bind_symbolic_shape %3700, [%292], affine_map<()[s0] -> (s0 * 128, 1024)> : !torch.vtensor<[?,1024],f16>
    %int4_4182 = torch.constant.int 4
    %int1024_4183 = torch.constant.int 1024
    %3701 = torch.prim.ListConstruct %int4_4182, %294, %int1024_4183 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3702 = torch.aten.view %3700, %3701 : !torch.vtensor<[?,1024],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1024],f16>
    torch.bind_symbolic_shape %3702, [%292], affine_map<()[s0] -> (4, s0 * 32, 1024)> : !torch.vtensor<[4,?,1024],f16>
    %int4_4184 = torch.constant.int 4
    %int32_4185 = torch.constant.int 32
    %int128_4186 = torch.constant.int 128
    %3703 = torch.prim.ListConstruct %int4_4184, %294, %int32_4185, %int128_4186 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3704 = torch.aten.view %3688, %3703 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %3704, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int4_4187 = torch.constant.int 4
    %int8_4188 = torch.constant.int 8
    %int128_4189 = torch.constant.int 128
    %3705 = torch.prim.ListConstruct %int4_4187, %294, %int8_4188, %int128_4189 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3706 = torch.aten.view %3695, %3705 : !torch.vtensor<[4,?,1024],f16>, !torch.list<int> -> !torch.vtensor<[4,?,8,128],f16>
    torch.bind_symbolic_shape %3706, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 128)> : !torch.vtensor<[4,?,8,128],f16>
    %int4_4190 = torch.constant.int 4
    %int8_4191 = torch.constant.int 8
    %int128_4192 = torch.constant.int 128
    %3707 = torch.prim.ListConstruct %int4_4190, %294, %int8_4191, %int128_4192 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3708 = torch.aten.view %3702, %3707 : !torch.vtensor<[4,?,1024],f16>, !torch.list<int> -> !torch.vtensor<[4,?,8,128],f16>
    torch.bind_symbolic_shape %3708, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 128)> : !torch.vtensor<[4,?,8,128],f16>
    %int131072_4193 = torch.constant.int 131072
    %none_4194 = torch.constant.none
    %none_4195 = torch.constant.none
    %cpu_4196 = torch.constant.device "cpu"
    %false_4197 = torch.constant.bool false
    %3709 = torch.aten.arange %int131072_4193, %none_4194, %none_4195, %cpu_4196, %false_4197 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64>
    %int0_4198 = torch.constant.int 0
    %int128_4199 = torch.constant.int 128
    %int2_4200 = torch.constant.int 2
    %none_4201 = torch.constant.none
    %none_4202 = torch.constant.none
    %cpu_4203 = torch.constant.device "cpu"
    %false_4204 = torch.constant.bool false
    %3710 = torch.aten.arange.start_step %int0_4198, %int128_4199, %int2_4200, %none_4201, %none_4202, %cpu_4203, %false_4204 : !torch.int, !torch.int, !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64>
    %int0_4205 = torch.constant.int 0
    %int0_4206 = torch.constant.int 0
    %int64_4207 = torch.constant.int 64
    %int1_4208 = torch.constant.int 1
    %3711 = torch.aten.slice.Tensor %3710, %int0_4205, %int0_4206, %int64_4207, %int1_4208 : !torch.vtensor<[64],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[64],si64>
    %int6_4209 = torch.constant.int 6
    %3712 = torch.prims.convert_element_type %3711, %int6_4209 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32>
    %int128_4210 = torch.constant.int 128
    %3713 = torch.aten.div.Scalar %3712, %int128_4210 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32>
    %float5.000000e05_4211 = torch.constant.float 5.000000e+05
    %3714 = torch.aten.pow.Scalar %float5.000000e05_4211, %3713 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %3715 = torch.aten.reciprocal %3714 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %float1.000000e00_4212 = torch.constant.float 1.000000e+00
    %3716 = torch.aten.mul.Scalar %3715, %float1.000000e00_4212 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32>
    %int131072_4213 = torch.constant.int 131072
    %int1_4214 = torch.constant.int 1
    %3717 = torch.prim.ListConstruct %int131072_4213, %int1_4214 : (!torch.int, !torch.int) -> !torch.list<int>
    %3718 = torch.aten.view %3709, %3717 : !torch.vtensor<[131072],si64>, !torch.list<int> -> !torch.vtensor<[131072,1],si64>
    %3719 = torch.aten.mul.Tensor %3718, %3716 : !torch.vtensor<[131072,1],si64>, !torch.vtensor<[64],f32> -> !torch.vtensor<[131072,64],f32>
    %3720 = torch.aten.cos %3719 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %3721 = torch.aten.sin %3719 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %3722 = torch.aten.complex %3720, %3721 : !torch.vtensor<[131072,64],f32>, !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],complex<f32>>
    %int1_4215 = torch.constant.int 1
    %3723 = torch.aten.size.int %3688, %int1_4215 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.int
    %int0_4216 = torch.constant.int 0
    %3724 = torch.aten.add.int %int0_4216, %3723 : !torch.int, !torch.int -> !torch.int
    %int0_4217 = torch.constant.int 0
    %int0_4218 = torch.constant.int 0
    %int1_4219 = torch.constant.int 1
    %3725 = torch.aten.slice.Tensor %3722, %int0_4217, %int0_4218, %3724, %int1_4219 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %3725, [%292], affine_map<()[s0] -> (s0 * 32, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_4220 = torch.constant.int 1
    %int0_4221 = torch.constant.int 0
    %int9223372036854775807_4222 = torch.constant.int 9223372036854775807
    %int1_4223 = torch.constant.int 1
    %3726 = torch.aten.slice.Tensor %3725, %int1_4220, %int0_4221, %int9223372036854775807_4222, %int1_4223 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %3726, [%292], affine_map<()[s0] -> (s0 * 32, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_4224 = torch.constant.int 0
    %3727 = torch.aten.unsqueeze %3726, %int0_4224 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %3727, [%292], affine_map<()[s0] -> (1, s0 * 32, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_4225 = torch.constant.int 2
    %3728 = torch.aten.unsqueeze %3727, %int2_4225 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %3728, [%292], affine_map<()[s0] -> (1, s0 * 32, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_4226 = torch.constant.int 3
    %int0_4227 = torch.constant.int 0
    %int9223372036854775807_4228 = torch.constant.int 9223372036854775807
    %int1_4229 = torch.constant.int 1
    %3729 = torch.aten.slice.Tensor %3728, %int3_4226, %int0_4227, %int9223372036854775807_4228, %int1_4229 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %3729, [%292], affine_map<()[s0] -> (1, s0 * 32, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %3730 = torch_c.to_builtin_tensor %3704 : !torch.vtensor<[4,?,32,128],f16> -> tensor<4x?x32x128xf16>
    %c1_4230 = arith.constant 1 : index
    %dim_4231 = tensor.dim %3730, %c1_4230 : tensor<4x?x32x128xf16>
    %3731 = flow.tensor.bitcast %3730 : tensor<4x?x32x128xf16>{%dim_4231} -> tensor<4x?x32x64xcomplex<f16>>{%dim_4231}
    %3732 = torch_c.from_builtin_tensor %3731 : tensor<4x?x32x64xcomplex<f16>> -> !torch.vtensor<[4,?,32,64],complex<f16>>
    torch.bind_symbolic_shape %3732, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 64)> : !torch.vtensor<[4,?,32,64],complex<f16>>
    %3733 = torch.aten.mul.Tensor %3732, %3729 : !torch.vtensor<[4,?,32,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,32,64],complex<f32>>
    torch.bind_symbolic_shape %3733, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 64)> : !torch.vtensor<[4,?,32,64],complex<f32>>
    %3734 = torch_c.to_builtin_tensor %3733 : !torch.vtensor<[4,?,32,64],complex<f32>> -> tensor<4x?x32x64xcomplex<f32>>
    %c1_4232 = arith.constant 1 : index
    %dim_4233 = tensor.dim %3734, %c1_4232 : tensor<4x?x32x64xcomplex<f32>>
    %3735 = flow.tensor.bitcast %3734 : tensor<4x?x32x64xcomplex<f32>>{%dim_4233} -> tensor<4x?x32x128xf32>{%dim_4233}
    %3736 = torch_c.from_builtin_tensor %3735 : tensor<4x?x32x128xf32> -> !torch.vtensor<[4,?,32,128],f32>
    torch.bind_symbolic_shape %3736, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f32>
    %int5_4234 = torch.constant.int 5
    %3737 = torch.prims.convert_element_type %3736, %int5_4234 : !torch.vtensor<[4,?,32,128],f32>, !torch.int -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %3737, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int131072_4235 = torch.constant.int 131072
    %none_4236 = torch.constant.none
    %none_4237 = torch.constant.none
    %cpu_4238 = torch.constant.device "cpu"
    %false_4239 = torch.constant.bool false
    %3738 = torch.aten.arange %int131072_4235, %none_4236, %none_4237, %cpu_4238, %false_4239 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64>
    %int0_4240 = torch.constant.int 0
    %int128_4241 = torch.constant.int 128
    %int2_4242 = torch.constant.int 2
    %none_4243 = torch.constant.none
    %none_4244 = torch.constant.none
    %cpu_4245 = torch.constant.device "cpu"
    %false_4246 = torch.constant.bool false
    %3739 = torch.aten.arange.start_step %int0_4240, %int128_4241, %int2_4242, %none_4243, %none_4244, %cpu_4245, %false_4246 : !torch.int, !torch.int, !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64>
    %int0_4247 = torch.constant.int 0
    %int0_4248 = torch.constant.int 0
    %int64_4249 = torch.constant.int 64
    %int1_4250 = torch.constant.int 1
    %3740 = torch.aten.slice.Tensor %3739, %int0_4247, %int0_4248, %int64_4249, %int1_4250 : !torch.vtensor<[64],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[64],si64>
    %int6_4251 = torch.constant.int 6
    %3741 = torch.prims.convert_element_type %3740, %int6_4251 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32>
    %int128_4252 = torch.constant.int 128
    %3742 = torch.aten.div.Scalar %3741, %int128_4252 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32>
    %float5.000000e05_4253 = torch.constant.float 5.000000e+05
    %3743 = torch.aten.pow.Scalar %float5.000000e05_4253, %3742 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %3744 = torch.aten.reciprocal %3743 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %float1.000000e00_4254 = torch.constant.float 1.000000e+00
    %3745 = torch.aten.mul.Scalar %3744, %float1.000000e00_4254 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32>
    %int131072_4255 = torch.constant.int 131072
    %int1_4256 = torch.constant.int 1
    %3746 = torch.prim.ListConstruct %int131072_4255, %int1_4256 : (!torch.int, !torch.int) -> !torch.list<int>
    %3747 = torch.aten.view %3738, %3746 : !torch.vtensor<[131072],si64>, !torch.list<int> -> !torch.vtensor<[131072,1],si64>
    %3748 = torch.aten.mul.Tensor %3747, %3745 : !torch.vtensor<[131072,1],si64>, !torch.vtensor<[64],f32> -> !torch.vtensor<[131072,64],f32>
    %3749 = torch.aten.cos %3748 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %3750 = torch.aten.sin %3748 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %3751 = torch.aten.complex %3749, %3750 : !torch.vtensor<[131072,64],f32>, !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],complex<f32>>
    %int1_4257 = torch.constant.int 1
    %3752 = torch.aten.size.int %3695, %int1_4257 : !torch.vtensor<[4,?,1024],f16>, !torch.int -> !torch.int
    %int0_4258 = torch.constant.int 0
    %3753 = torch.aten.add.int %int0_4258, %3752 : !torch.int, !torch.int -> !torch.int
    %int0_4259 = torch.constant.int 0
    %int0_4260 = torch.constant.int 0
    %int1_4261 = torch.constant.int 1
    %3754 = torch.aten.slice.Tensor %3751, %int0_4259, %int0_4260, %3753, %int1_4261 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %3754, [%292], affine_map<()[s0] -> (s0 * 32, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_4262 = torch.constant.int 1
    %int0_4263 = torch.constant.int 0
    %int9223372036854775807_4264 = torch.constant.int 9223372036854775807
    %int1_4265 = torch.constant.int 1
    %3755 = torch.aten.slice.Tensor %3754, %int1_4262, %int0_4263, %int9223372036854775807_4264, %int1_4265 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %3755, [%292], affine_map<()[s0] -> (s0 * 32, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_4266 = torch.constant.int 0
    %3756 = torch.aten.unsqueeze %3755, %int0_4266 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %3756, [%292], affine_map<()[s0] -> (1, s0 * 32, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_4267 = torch.constant.int 2
    %3757 = torch.aten.unsqueeze %3756, %int2_4267 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %3757, [%292], affine_map<()[s0] -> (1, s0 * 32, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_4268 = torch.constant.int 3
    %int0_4269 = torch.constant.int 0
    %int9223372036854775807_4270 = torch.constant.int 9223372036854775807
    %int1_4271 = torch.constant.int 1
    %3758 = torch.aten.slice.Tensor %3757, %int3_4268, %int0_4269, %int9223372036854775807_4270, %int1_4271 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %3758, [%292], affine_map<()[s0] -> (1, s0 * 32, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %3759 = torch_c.to_builtin_tensor %3706 : !torch.vtensor<[4,?,8,128],f16> -> tensor<4x?x8x128xf16>
    %c1_4272 = arith.constant 1 : index
    %dim_4273 = tensor.dim %3759, %c1_4272 : tensor<4x?x8x128xf16>
    %3760 = flow.tensor.bitcast %3759 : tensor<4x?x8x128xf16>{%dim_4273} -> tensor<4x?x8x64xcomplex<f16>>{%dim_4273}
    %3761 = torch_c.from_builtin_tensor %3760 : tensor<4x?x8x64xcomplex<f16>> -> !torch.vtensor<[4,?,8,64],complex<f16>>
    torch.bind_symbolic_shape %3761, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 64)> : !torch.vtensor<[4,?,8,64],complex<f16>>
    %3762 = torch.aten.mul.Tensor %3761, %3758 : !torch.vtensor<[4,?,8,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,8,64],complex<f32>>
    torch.bind_symbolic_shape %3762, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 64)> : !torch.vtensor<[4,?,8,64],complex<f32>>
    %3763 = torch_c.to_builtin_tensor %3762 : !torch.vtensor<[4,?,8,64],complex<f32>> -> tensor<4x?x8x64xcomplex<f32>>
    %c1_4274 = arith.constant 1 : index
    %dim_4275 = tensor.dim %3763, %c1_4274 : tensor<4x?x8x64xcomplex<f32>>
    %3764 = flow.tensor.bitcast %3763 : tensor<4x?x8x64xcomplex<f32>>{%dim_4275} -> tensor<4x?x8x128xf32>{%dim_4275}
    %3765 = torch_c.from_builtin_tensor %3764 : tensor<4x?x8x128xf32> -> !torch.vtensor<[4,?,8,128],f32>
    torch.bind_symbolic_shape %3765, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 128)> : !torch.vtensor<[4,?,8,128],f32>
    %int5_4276 = torch.constant.int 5
    %3766 = torch.prims.convert_element_type %3765, %int5_4276 : !torch.vtensor<[4,?,8,128],f32>, !torch.int -> !torch.vtensor<[4,?,8,128],f16>
    torch.bind_symbolic_shape %3766, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 128)> : !torch.vtensor<[4,?,8,128],f16>
    %int64_4277 = torch.constant.int 64
    %3767 = torch.aten.mul.Scalar %arg2, %int64_4277 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %3767, [%292], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int34 = torch.constant.int 34
    %int1_4278 = torch.constant.int 1
    %3768 = torch.aten.add.Scalar %3767, %int34, %int1_4278 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %3768, [%292], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int4_4279 = torch.constant.int 4
    %int32_4280 = torch.constant.int 32
    %int8_4281 = torch.constant.int 8
    %int128_4282 = torch.constant.int 128
    %3769 = torch.prim.ListConstruct %int4_4279, %425, %int32_4280, %int8_4281, %int128_4282 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3770 = torch.aten.view %3766, %3769 : !torch.vtensor<[4,?,8,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,8,128],f16>
    torch.bind_symbolic_shape %3770, [%292], affine_map<()[s0] -> (4, s0, 32, 8, 128)> : !torch.vtensor<[4,?,32,8,128],f16>
    %int4_4283 = torch.constant.int 4
    %3771 = torch.aten.mul.int %int4_4283, %425 : !torch.int, !torch.int -> !torch.int
    %int32_4284 = torch.constant.int 32
    %int8_4285 = torch.constant.int 8
    %int128_4286 = torch.constant.int 128
    %3772 = torch.prim.ListConstruct %3771, %int32_4284, %int8_4285, %int128_4286 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3773 = torch.aten.view %3770, %3772 : !torch.vtensor<[4,?,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %3773, [%292], affine_map<()[s0] -> (s0 * 4, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int4_4287 = torch.constant.int 4
    %3774 = torch.aten.mul.int %int4_4287, %425 : !torch.int, !torch.int -> !torch.int
    %3775 = torch.prim.ListConstruct %3774 : (!torch.int) -> !torch.list<int>
    %3776 = torch.aten.view %3768, %3775 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %3776, [%292], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int32_4288 = torch.constant.int 32
    %int2_4289 = torch.constant.int 2
    %int32_4290 = torch.constant.int 32
    %int8_4291 = torch.constant.int 8
    %int128_4292 = torch.constant.int 128
    %3777 = torch.prim.ListConstruct %416, %int32_4288, %int2_4289, %int32_4290, %int8_4291, %int128_4292 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3778 = torch.aten.view %3610, %3777 : !torch.vtensor<[?,2097152],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f16>
    torch.bind_symbolic_shape %3778, [%293], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f16>
    %int32_4293 = torch.constant.int 32
    %3779 = torch.aten.mul.int %416, %int32_4293 : !torch.int, !torch.int -> !torch.int
    %int2_4294 = torch.constant.int 2
    %3780 = torch.aten.mul.int %3779, %int2_4294 : !torch.int, !torch.int -> !torch.int
    %int32_4295 = torch.constant.int 32
    %int8_4296 = torch.constant.int 8
    %int128_4297 = torch.constant.int 128
    %3781 = torch.prim.ListConstruct %3780, %int32_4295, %int8_4296, %int128_4297 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3782 = torch.aten.view %3778, %3781 : !torch.vtensor<[?,32,2,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %3782, [%293], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %3783 = torch.prim.ListConstruct %3776 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_4298 = torch.constant.bool false
    %3784 = torch.aten.index_put %3782, %3783, %3773, %false_4298 : !torch.vtensor<[?,32,8,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,32,8,128],f16>, !torch.bool -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %3784, [%293], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int32_4299 = torch.constant.int 32
    %int2_4300 = torch.constant.int 2
    %int32_4301 = torch.constant.int 32
    %int8_4302 = torch.constant.int 8
    %int128_4303 = torch.constant.int 128
    %3785 = torch.prim.ListConstruct %416, %int32_4299, %int2_4300, %int32_4301, %int8_4302, %int128_4303 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3786 = torch.aten.view %3784, %3785 : !torch.vtensor<[?,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f16>
    torch.bind_symbolic_shape %3786, [%293], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f16>
    %int2097152_4304 = torch.constant.int 2097152
    %3787 = torch.prim.ListConstruct %416, %int2097152_4304 : (!torch.int, !torch.int) -> !torch.list<int>
    %3788 = torch.aten.view %3786, %3787 : !torch.vtensor<[?,32,2,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,2097152],f16>
    torch.bind_symbolic_shape %3788, [%293], affine_map<()[s0] -> (s0, 2097152)> : !torch.vtensor<[?,2097152],f16>
    %int32_4305 = torch.constant.int 32
    %int2_4306 = torch.constant.int 2
    %int32_4307 = torch.constant.int 32
    %int8_4308 = torch.constant.int 8
    %int128_4309 = torch.constant.int 128
    %3789 = torch.prim.ListConstruct %416, %int32_4305, %int2_4306, %int32_4307, %int8_4308, %int128_4309 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3790 = torch.aten.view %3788, %3789 : !torch.vtensor<[?,2097152],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f16>
    torch.bind_symbolic_shape %3790, [%293], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f16>
    %int32_4310 = torch.constant.int 32
    %int8_4311 = torch.constant.int 8
    %int128_4312 = torch.constant.int 128
    %3791 = torch.prim.ListConstruct %3780, %int32_4310, %int8_4311, %int128_4312 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3792 = torch.aten.view %3790, %3791 : !torch.vtensor<[?,32,2,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %3792, [%293], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int4_4313 = torch.constant.int 4
    %int32_4314 = torch.constant.int 32
    %int8_4315 = torch.constant.int 8
    %int128_4316 = torch.constant.int 128
    %3793 = torch.prim.ListConstruct %int4_4313, %425, %int32_4314, %int8_4315, %int128_4316 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3794 = torch.aten.view %3708, %3793 : !torch.vtensor<[4,?,8,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,8,128],f16>
    torch.bind_symbolic_shape %3794, [%292], affine_map<()[s0] -> (4, s0, 32, 8, 128)> : !torch.vtensor<[4,?,32,8,128],f16>
    %int4_4317 = torch.constant.int 4
    %3795 = torch.aten.mul.int %int4_4317, %425 : !torch.int, !torch.int -> !torch.int
    %int32_4318 = torch.constant.int 32
    %int8_4319 = torch.constant.int 8
    %int128_4320 = torch.constant.int 128
    %3796 = torch.prim.ListConstruct %3795, %int32_4318, %int8_4319, %int128_4320 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3797 = torch.aten.view %3794, %3796 : !torch.vtensor<[4,?,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %3797, [%292], affine_map<()[s0] -> (s0 * 4, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int1_4321 = torch.constant.int 1
    %int1_4322 = torch.constant.int 1
    %3798 = torch.aten.add.Scalar %3768, %int1_4321, %int1_4322 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %3798, [%292], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int4_4323 = torch.constant.int 4
    %3799 = torch.aten.mul.int %int4_4323, %425 : !torch.int, !torch.int -> !torch.int
    %3800 = torch.prim.ListConstruct %3799 : (!torch.int) -> !torch.list<int>
    %3801 = torch.aten.view %3798, %3800 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %3801, [%292], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %3802 = torch.prim.ListConstruct %3801 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_4324 = torch.constant.bool false
    %3803 = torch.aten.index_put %3792, %3802, %3797, %false_4324 : !torch.vtensor<[?,32,8,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,32,8,128],f16>, !torch.bool -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %3803, [%293], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int32_4325 = torch.constant.int 32
    %int2_4326 = torch.constant.int 2
    %int32_4327 = torch.constant.int 32
    %int8_4328 = torch.constant.int 8
    %int128_4329 = torch.constant.int 128
    %3804 = torch.prim.ListConstruct %416, %int32_4325, %int2_4326, %int32_4327, %int8_4328, %int128_4329 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3805 = torch.aten.view %3803, %3804 : !torch.vtensor<[?,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f16>
    torch.bind_symbolic_shape %3805, [%293], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f16>
    %int2097152_4330 = torch.constant.int 2097152
    %3806 = torch.prim.ListConstruct %416, %int2097152_4330 : (!torch.int, !torch.int) -> !torch.list<int>
    %3807 = torch.aten.view %3805, %3806 : !torch.vtensor<[?,32,2,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,2097152],f16>
    torch.bind_symbolic_shape %3807, [%293], affine_map<()[s0] -> (s0, 2097152)> : !torch.vtensor<[?,2097152],f16>
    %int-2_4331 = torch.constant.int -2
    %3808 = torch.aten.unsqueeze %3766, %int-2_4331 : !torch.vtensor<[4,?,8,128],f16>, !torch.int -> !torch.vtensor<[4,?,8,1,128],f16>
    torch.bind_symbolic_shape %3808, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 1, 128)> : !torch.vtensor<[4,?,8,1,128],f16>
    %int4_4332 = torch.constant.int 4
    %int8_4333 = torch.constant.int 8
    %int4_4334 = torch.constant.int 4
    %int128_4335 = torch.constant.int 128
    %3809 = torch.prim.ListConstruct %int4_4332, %3752, %int8_4333, %int4_4334, %int128_4335 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_4336 = torch.constant.bool false
    %3810 = torch.aten.expand %3808, %3809, %false_4336 : !torch.vtensor<[4,?,8,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,8,4,128],f16>
    torch.bind_symbolic_shape %3810, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 4, 128)> : !torch.vtensor<[4,?,8,4,128],f16>
    %int0_4337 = torch.constant.int 0
    %3811 = torch.aten.clone %3810, %int0_4337 : !torch.vtensor<[4,?,8,4,128],f16>, !torch.int -> !torch.vtensor<[4,?,8,4,128],f16>
    torch.bind_symbolic_shape %3811, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 4, 128)> : !torch.vtensor<[4,?,8,4,128],f16>
    %int4_4338 = torch.constant.int 4
    %int32_4339 = torch.constant.int 32
    %int128_4340 = torch.constant.int 128
    %3812 = torch.prim.ListConstruct %int4_4338, %3752, %int32_4339, %int128_4340 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3813 = torch.aten._unsafe_view %3811, %3812 : !torch.vtensor<[4,?,8,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %3813, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int-2_4341 = torch.constant.int -2
    %3814 = torch.aten.unsqueeze %3708, %int-2_4341 : !torch.vtensor<[4,?,8,128],f16>, !torch.int -> !torch.vtensor<[4,?,8,1,128],f16>
    torch.bind_symbolic_shape %3814, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 1, 128)> : !torch.vtensor<[4,?,8,1,128],f16>
    %int1_4342 = torch.constant.int 1
    %3815 = torch.aten.size.int %3702, %int1_4342 : !torch.vtensor<[4,?,1024],f16>, !torch.int -> !torch.int
    %int4_4343 = torch.constant.int 4
    %int8_4344 = torch.constant.int 8
    %int4_4345 = torch.constant.int 4
    %int128_4346 = torch.constant.int 128
    %3816 = torch.prim.ListConstruct %int4_4343, %3815, %int8_4344, %int4_4345, %int128_4346 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_4347 = torch.constant.bool false
    %3817 = torch.aten.expand %3814, %3816, %false_4347 : !torch.vtensor<[4,?,8,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,8,4,128],f16>
    torch.bind_symbolic_shape %3817, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 4, 128)> : !torch.vtensor<[4,?,8,4,128],f16>
    %int0_4348 = torch.constant.int 0
    %3818 = torch.aten.clone %3817, %int0_4348 : !torch.vtensor<[4,?,8,4,128],f16>, !torch.int -> !torch.vtensor<[4,?,8,4,128],f16>
    torch.bind_symbolic_shape %3818, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 4, 128)> : !torch.vtensor<[4,?,8,4,128],f16>
    %int4_4349 = torch.constant.int 4
    %int32_4350 = torch.constant.int 32
    %int128_4351 = torch.constant.int 128
    %3819 = torch.prim.ListConstruct %int4_4349, %3815, %int32_4350, %int128_4351 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3820 = torch.aten._unsafe_view %3818, %3819 : !torch.vtensor<[4,?,8,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %3820, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int1_4352 = torch.constant.int 1
    %int2_4353 = torch.constant.int 2
    %3821 = torch.aten.transpose.int %3737, %int1_4352, %int2_4353 : !torch.vtensor<[4,?,32,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,32,?,128],f16>
    torch.bind_symbolic_shape %3821, [%292], affine_map<()[s0] -> (4, 32, s0 * 32, 128)> : !torch.vtensor<[4,32,?,128],f16>
    %int1_4354 = torch.constant.int 1
    %int2_4355 = torch.constant.int 2
    %3822 = torch.aten.transpose.int %3813, %int1_4354, %int2_4355 : !torch.vtensor<[4,?,32,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,32,?,128],f16>
    torch.bind_symbolic_shape %3822, [%292], affine_map<()[s0] -> (4, 32, s0 * 32, 128)> : !torch.vtensor<[4,32,?,128],f16>
    %int1_4356 = torch.constant.int 1
    %int2_4357 = torch.constant.int 2
    %3823 = torch.aten.transpose.int %3820, %int1_4356, %int2_4357 : !torch.vtensor<[4,?,32,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,32,?,128],f16>
    torch.bind_symbolic_shape %3823, [%292], affine_map<()[s0] -> (4, 32, s0 * 32, 128)> : !torch.vtensor<[4,32,?,128],f16>
    %float0.000000e00_4358 = torch.constant.float 0.000000e+00
    %false_4359 = torch.constant.bool false
    %none_4360 = torch.constant.none
    %3824:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%3821, %3822, %3823, %float0.000000e00_4358, %false_4359, %320, %none_4360) : (!torch.vtensor<[4,32,?,128],f16>, !torch.vtensor<[4,32,?,128],f16>, !torch.vtensor<[4,32,?,128],f16>, !torch.float, !torch.bool, !torch.vtensor<[4,1,?,?],f16>, !torch.none) -> (!torch.vtensor<[4,32,?,128],f16>, !torch.vtensor<[4,32,?],f32>) 
    torch.bind_symbolic_shape %3824#0, [%292], affine_map<()[s0] -> (4, 32, s0 * 32, 128)> : !torch.vtensor<[4,32,?,128],f16>
    %int1_4361 = torch.constant.int 1
    %int2_4362 = torch.constant.int 2
    %3825 = torch.aten.transpose.int %3824#0, %int1_4361, %int2_4362 : !torch.vtensor<[4,32,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %3825, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int4_4363 = torch.constant.int 4
    %int4096_4364 = torch.constant.int 4096
    %3826 = torch.prim.ListConstruct %int4_4363, %3723, %int4096_4364 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3827 = torch.aten.view %3825, %3826 : !torch.vtensor<[4,?,32,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3827, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int-2_4365 = torch.constant.int -2
    %int-1_4366 = torch.constant.int -1
    %3828 = torch.aten.transpose.int %158, %int-2_4365, %int-1_4366 : !torch.vtensor<[4096,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,4096],f16>
    %int4_4367 = torch.constant.int 4
    %3829 = torch.aten.mul.int %int4_4367, %3723 : !torch.int, !torch.int -> !torch.int
    %int4096_4368 = torch.constant.int 4096
    %3830 = torch.prim.ListConstruct %3829, %int4096_4368 : (!torch.int, !torch.int) -> !torch.list<int>
    %3831 = torch.aten.view %3827, %3830 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %3831, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %3832 = torch.aten.mm %3831, %3828 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %3832, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_4369 = torch.constant.int 4
    %int4096_4370 = torch.constant.int 4096
    %3833 = torch.prim.ListConstruct %int4_4369, %3723, %int4096_4370 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3834 = torch.aten.view %3832, %3833 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3834, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_4371 = torch.constant.int 1
    %3835 = torch.aten.add.Tensor %3672, %3834, %int1_4371 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3835, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int6_4372 = torch.constant.int 6
    %3836 = torch.prims.convert_element_type %3835, %int6_4372 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %3836, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_4373 = torch.constant.int 2
    %3837 = torch.aten.pow.Tensor_Scalar %3836, %int2_4373 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %3837, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int-1_4374 = torch.constant.int -1
    %3838 = torch.prim.ListConstruct %int-1_4374 : (!torch.int) -> !torch.list<int>
    %true_4375 = torch.constant.bool true
    %none_4376 = torch.constant.none
    %3839 = torch.aten.mean.dim %3837, %3838, %true_4375, %none_4376 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %3839, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_4377 = torch.constant.float 9.9999997473787516E-6
    %int1_4378 = torch.constant.int 1
    %3840 = torch.aten.add.Scalar %3839, %float9.999990e-06_4377, %int1_4378 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %3840, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %3841 = torch.aten.rsqrt %3840 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %3841, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %3842 = torch.aten.mul.Tensor %3836, %3841 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %3842, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %3843 = torch.aten.mul.Tensor %159, %3842 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %3843, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int5_4379 = torch.constant.int 5
    %3844 = torch.prims.convert_element_type %3843, %int5_4379 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3844, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int-2_4380 = torch.constant.int -2
    %int-1_4381 = torch.constant.int -1
    %3845 = torch.aten.transpose.int %160, %int-2_4380, %int-1_4381 : !torch.vtensor<[14336,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,14336],f16>
    %int4_4382 = torch.constant.int 4
    %3846 = torch.aten.mul.int %int4_4382, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_4383 = torch.constant.int 4096
    %3847 = torch.prim.ListConstruct %3846, %int4096_4383 : (!torch.int, !torch.int) -> !torch.list<int>
    %3848 = torch.aten.view %3844, %3847 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %3848, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %3849 = torch.aten.mm %3848, %3845 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,14336],f16> -> !torch.vtensor<[?,14336],f16>
    torch.bind_symbolic_shape %3849, [%292], affine_map<()[s0] -> (s0 * 128, 14336)> : !torch.vtensor<[?,14336],f16>
    %int4_4384 = torch.constant.int 4
    %int14336_4385 = torch.constant.int 14336
    %3850 = torch.prim.ListConstruct %int4_4384, %294, %int14336_4385 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3851 = torch.aten.view %3849, %3850 : !torch.vtensor<[?,14336],f16>, !torch.list<int> -> !torch.vtensor<[4,?,14336],f16>
    torch.bind_symbolic_shape %3851, [%292], affine_map<()[s0] -> (4, s0 * 32, 14336)> : !torch.vtensor<[4,?,14336],f16>
    %3852 = torch.aten.silu %3851 : !torch.vtensor<[4,?,14336],f16> -> !torch.vtensor<[4,?,14336],f16>
    torch.bind_symbolic_shape %3852, [%292], affine_map<()[s0] -> (4, s0 * 32, 14336)> : !torch.vtensor<[4,?,14336],f16>
    %int-2_4386 = torch.constant.int -2
    %int-1_4387 = torch.constant.int -1
    %3853 = torch.aten.transpose.int %161, %int-2_4386, %int-1_4387 : !torch.vtensor<[14336,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,14336],f16>
    %int4_4388 = torch.constant.int 4
    %3854 = torch.aten.mul.int %int4_4388, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_4389 = torch.constant.int 4096
    %3855 = torch.prim.ListConstruct %3854, %int4096_4389 : (!torch.int, !torch.int) -> !torch.list<int>
    %3856 = torch.aten.view %3844, %3855 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %3856, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %3857 = torch.aten.mm %3856, %3853 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,14336],f16> -> !torch.vtensor<[?,14336],f16>
    torch.bind_symbolic_shape %3857, [%292], affine_map<()[s0] -> (s0 * 128, 14336)> : !torch.vtensor<[?,14336],f16>
    %int4_4390 = torch.constant.int 4
    %int14336_4391 = torch.constant.int 14336
    %3858 = torch.prim.ListConstruct %int4_4390, %294, %int14336_4391 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3859 = torch.aten.view %3857, %3858 : !torch.vtensor<[?,14336],f16>, !torch.list<int> -> !torch.vtensor<[4,?,14336],f16>
    torch.bind_symbolic_shape %3859, [%292], affine_map<()[s0] -> (4, s0 * 32, 14336)> : !torch.vtensor<[4,?,14336],f16>
    %3860 = torch.aten.mul.Tensor %3852, %3859 : !torch.vtensor<[4,?,14336],f16>, !torch.vtensor<[4,?,14336],f16> -> !torch.vtensor<[4,?,14336],f16>
    torch.bind_symbolic_shape %3860, [%292], affine_map<()[s0] -> (4, s0 * 32, 14336)> : !torch.vtensor<[4,?,14336],f16>
    %int-2_4392 = torch.constant.int -2
    %int-1_4393 = torch.constant.int -1
    %3861 = torch.aten.transpose.int %162, %int-2_4392, %int-1_4393 : !torch.vtensor<[4096,14336],f16>, !torch.int, !torch.int -> !torch.vtensor<[14336,4096],f16>
    %int1_4394 = torch.constant.int 1
    %3862 = torch.aten.size.int %3851, %int1_4394 : !torch.vtensor<[4,?,14336],f16>, !torch.int -> !torch.int
    %int4_4395 = torch.constant.int 4
    %3863 = torch.aten.mul.int %int4_4395, %3862 : !torch.int, !torch.int -> !torch.int
    %int14336_4396 = torch.constant.int 14336
    %3864 = torch.prim.ListConstruct %3863, %int14336_4396 : (!torch.int, !torch.int) -> !torch.list<int>
    %3865 = torch.aten.view %3860, %3864 : !torch.vtensor<[4,?,14336],f16>, !torch.list<int> -> !torch.vtensor<[?,14336],f16>
    torch.bind_symbolic_shape %3865, [%292], affine_map<()[s0] -> (s0 * 128, 14336)> : !torch.vtensor<[?,14336],f16>
    %3866 = torch.aten.mm %3865, %3861 : !torch.vtensor<[?,14336],f16>, !torch.vtensor<[14336,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %3866, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_4397 = torch.constant.int 4
    %int4096_4398 = torch.constant.int 4096
    %3867 = torch.prim.ListConstruct %int4_4397, %3862, %int4096_4398 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3868 = torch.aten.view %3866, %3867 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3868, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_4399 = torch.constant.int 1
    %3869 = torch.aten.add.Tensor %3835, %3868, %int1_4399 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3869, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int6_4400 = torch.constant.int 6
    %3870 = torch.prims.convert_element_type %3869, %int6_4400 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %3870, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_4401 = torch.constant.int 2
    %3871 = torch.aten.pow.Tensor_Scalar %3870, %int2_4401 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %3871, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int-1_4402 = torch.constant.int -1
    %3872 = torch.prim.ListConstruct %int-1_4402 : (!torch.int) -> !torch.list<int>
    %true_4403 = torch.constant.bool true
    %none_4404 = torch.constant.none
    %3873 = torch.aten.mean.dim %3871, %3872, %true_4403, %none_4404 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %3873, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_4405 = torch.constant.float 9.9999997473787516E-6
    %int1_4406 = torch.constant.int 1
    %3874 = torch.aten.add.Scalar %3873, %float9.999990e-06_4405, %int1_4406 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %3874, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %3875 = torch.aten.rsqrt %3874 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %3875, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %3876 = torch.aten.mul.Tensor %3870, %3875 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %3876, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %3877 = torch.aten.mul.Tensor %163, %3876 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %3877, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int5_4407 = torch.constant.int 5
    %3878 = torch.prims.convert_element_type %3877, %int5_4407 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3878, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int-2_4408 = torch.constant.int -2
    %int-1_4409 = torch.constant.int -1
    %3879 = torch.aten.transpose.int %164, %int-2_4408, %int-1_4409 : !torch.vtensor<[4096,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,4096],f16>
    %int4_4410 = torch.constant.int 4
    %3880 = torch.aten.mul.int %int4_4410, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_4411 = torch.constant.int 4096
    %3881 = torch.prim.ListConstruct %3880, %int4096_4411 : (!torch.int, !torch.int) -> !torch.list<int>
    %3882 = torch.aten.view %3878, %3881 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %3882, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %3883 = torch.aten.mm %3882, %3879 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %3883, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_4412 = torch.constant.int 4
    %int4096_4413 = torch.constant.int 4096
    %3884 = torch.prim.ListConstruct %int4_4412, %294, %int4096_4413 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3885 = torch.aten.view %3883, %3884 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3885, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int-2_4414 = torch.constant.int -2
    %int-1_4415 = torch.constant.int -1
    %3886 = torch.aten.transpose.int %165, %int-2_4414, %int-1_4415 : !torch.vtensor<[1024,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,1024],f16>
    %int4_4416 = torch.constant.int 4
    %3887 = torch.aten.mul.int %int4_4416, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_4417 = torch.constant.int 4096
    %3888 = torch.prim.ListConstruct %3887, %int4096_4417 : (!torch.int, !torch.int) -> !torch.list<int>
    %3889 = torch.aten.view %3878, %3888 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %3889, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %3890 = torch.aten.mm %3889, %3886 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1024],f16> -> !torch.vtensor<[?,1024],f16>
    torch.bind_symbolic_shape %3890, [%292], affine_map<()[s0] -> (s0 * 128, 1024)> : !torch.vtensor<[?,1024],f16>
    %int4_4418 = torch.constant.int 4
    %int1024_4419 = torch.constant.int 1024
    %3891 = torch.prim.ListConstruct %int4_4418, %294, %int1024_4419 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3892 = torch.aten.view %3890, %3891 : !torch.vtensor<[?,1024],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1024],f16>
    torch.bind_symbolic_shape %3892, [%292], affine_map<()[s0] -> (4, s0 * 32, 1024)> : !torch.vtensor<[4,?,1024],f16>
    %int-2_4420 = torch.constant.int -2
    %int-1_4421 = torch.constant.int -1
    %3893 = torch.aten.transpose.int %166, %int-2_4420, %int-1_4421 : !torch.vtensor<[1024,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,1024],f16>
    %int4_4422 = torch.constant.int 4
    %3894 = torch.aten.mul.int %int4_4422, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_4423 = torch.constant.int 4096
    %3895 = torch.prim.ListConstruct %3894, %int4096_4423 : (!torch.int, !torch.int) -> !torch.list<int>
    %3896 = torch.aten.view %3878, %3895 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %3896, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %3897 = torch.aten.mm %3896, %3893 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1024],f16> -> !torch.vtensor<[?,1024],f16>
    torch.bind_symbolic_shape %3897, [%292], affine_map<()[s0] -> (s0 * 128, 1024)> : !torch.vtensor<[?,1024],f16>
    %int4_4424 = torch.constant.int 4
    %int1024_4425 = torch.constant.int 1024
    %3898 = torch.prim.ListConstruct %int4_4424, %294, %int1024_4425 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3899 = torch.aten.view %3897, %3898 : !torch.vtensor<[?,1024],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1024],f16>
    torch.bind_symbolic_shape %3899, [%292], affine_map<()[s0] -> (4, s0 * 32, 1024)> : !torch.vtensor<[4,?,1024],f16>
    %int4_4426 = torch.constant.int 4
    %int32_4427 = torch.constant.int 32
    %int128_4428 = torch.constant.int 128
    %3900 = torch.prim.ListConstruct %int4_4426, %294, %int32_4427, %int128_4428 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3901 = torch.aten.view %3885, %3900 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %3901, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int4_4429 = torch.constant.int 4
    %int8_4430 = torch.constant.int 8
    %int128_4431 = torch.constant.int 128
    %3902 = torch.prim.ListConstruct %int4_4429, %294, %int8_4430, %int128_4431 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3903 = torch.aten.view %3892, %3902 : !torch.vtensor<[4,?,1024],f16>, !torch.list<int> -> !torch.vtensor<[4,?,8,128],f16>
    torch.bind_symbolic_shape %3903, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 128)> : !torch.vtensor<[4,?,8,128],f16>
    %int4_4432 = torch.constant.int 4
    %int8_4433 = torch.constant.int 8
    %int128_4434 = torch.constant.int 128
    %3904 = torch.prim.ListConstruct %int4_4432, %294, %int8_4433, %int128_4434 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3905 = torch.aten.view %3899, %3904 : !torch.vtensor<[4,?,1024],f16>, !torch.list<int> -> !torch.vtensor<[4,?,8,128],f16>
    torch.bind_symbolic_shape %3905, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 128)> : !torch.vtensor<[4,?,8,128],f16>
    %int131072_4435 = torch.constant.int 131072
    %none_4436 = torch.constant.none
    %none_4437 = torch.constant.none
    %cpu_4438 = torch.constant.device "cpu"
    %false_4439 = torch.constant.bool false
    %3906 = torch.aten.arange %int131072_4435, %none_4436, %none_4437, %cpu_4438, %false_4439 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64>
    %int0_4440 = torch.constant.int 0
    %int128_4441 = torch.constant.int 128
    %int2_4442 = torch.constant.int 2
    %none_4443 = torch.constant.none
    %none_4444 = torch.constant.none
    %cpu_4445 = torch.constant.device "cpu"
    %false_4446 = torch.constant.bool false
    %3907 = torch.aten.arange.start_step %int0_4440, %int128_4441, %int2_4442, %none_4443, %none_4444, %cpu_4445, %false_4446 : !torch.int, !torch.int, !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64>
    %int0_4447 = torch.constant.int 0
    %int0_4448 = torch.constant.int 0
    %int64_4449 = torch.constant.int 64
    %int1_4450 = torch.constant.int 1
    %3908 = torch.aten.slice.Tensor %3907, %int0_4447, %int0_4448, %int64_4449, %int1_4450 : !torch.vtensor<[64],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[64],si64>
    %int6_4451 = torch.constant.int 6
    %3909 = torch.prims.convert_element_type %3908, %int6_4451 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32>
    %int128_4452 = torch.constant.int 128
    %3910 = torch.aten.div.Scalar %3909, %int128_4452 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32>
    %float5.000000e05_4453 = torch.constant.float 5.000000e+05
    %3911 = torch.aten.pow.Scalar %float5.000000e05_4453, %3910 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %3912 = torch.aten.reciprocal %3911 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %float1.000000e00_4454 = torch.constant.float 1.000000e+00
    %3913 = torch.aten.mul.Scalar %3912, %float1.000000e00_4454 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32>
    %int131072_4455 = torch.constant.int 131072
    %int1_4456 = torch.constant.int 1
    %3914 = torch.prim.ListConstruct %int131072_4455, %int1_4456 : (!torch.int, !torch.int) -> !torch.list<int>
    %3915 = torch.aten.view %3906, %3914 : !torch.vtensor<[131072],si64>, !torch.list<int> -> !torch.vtensor<[131072,1],si64>
    %3916 = torch.aten.mul.Tensor %3915, %3913 : !torch.vtensor<[131072,1],si64>, !torch.vtensor<[64],f32> -> !torch.vtensor<[131072,64],f32>
    %3917 = torch.aten.cos %3916 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %3918 = torch.aten.sin %3916 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %3919 = torch.aten.complex %3917, %3918 : !torch.vtensor<[131072,64],f32>, !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],complex<f32>>
    %int1_4457 = torch.constant.int 1
    %3920 = torch.aten.size.int %3885, %int1_4457 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.int
    %int0_4458 = torch.constant.int 0
    %3921 = torch.aten.add.int %int0_4458, %3920 : !torch.int, !torch.int -> !torch.int
    %int0_4459 = torch.constant.int 0
    %int0_4460 = torch.constant.int 0
    %int1_4461 = torch.constant.int 1
    %3922 = torch.aten.slice.Tensor %3919, %int0_4459, %int0_4460, %3921, %int1_4461 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %3922, [%292], affine_map<()[s0] -> (s0 * 32, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_4462 = torch.constant.int 1
    %int0_4463 = torch.constant.int 0
    %int9223372036854775807_4464 = torch.constant.int 9223372036854775807
    %int1_4465 = torch.constant.int 1
    %3923 = torch.aten.slice.Tensor %3922, %int1_4462, %int0_4463, %int9223372036854775807_4464, %int1_4465 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %3923, [%292], affine_map<()[s0] -> (s0 * 32, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_4466 = torch.constant.int 0
    %3924 = torch.aten.unsqueeze %3923, %int0_4466 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %3924, [%292], affine_map<()[s0] -> (1, s0 * 32, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_4467 = torch.constant.int 2
    %3925 = torch.aten.unsqueeze %3924, %int2_4467 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %3925, [%292], affine_map<()[s0] -> (1, s0 * 32, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_4468 = torch.constant.int 3
    %int0_4469 = torch.constant.int 0
    %int9223372036854775807_4470 = torch.constant.int 9223372036854775807
    %int1_4471 = torch.constant.int 1
    %3926 = torch.aten.slice.Tensor %3925, %int3_4468, %int0_4469, %int9223372036854775807_4470, %int1_4471 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %3926, [%292], affine_map<()[s0] -> (1, s0 * 32, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %3927 = torch_c.to_builtin_tensor %3901 : !torch.vtensor<[4,?,32,128],f16> -> tensor<4x?x32x128xf16>
    %c1_4472 = arith.constant 1 : index
    %dim_4473 = tensor.dim %3927, %c1_4472 : tensor<4x?x32x128xf16>
    %3928 = flow.tensor.bitcast %3927 : tensor<4x?x32x128xf16>{%dim_4473} -> tensor<4x?x32x64xcomplex<f16>>{%dim_4473}
    %3929 = torch_c.from_builtin_tensor %3928 : tensor<4x?x32x64xcomplex<f16>> -> !torch.vtensor<[4,?,32,64],complex<f16>>
    torch.bind_symbolic_shape %3929, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 64)> : !torch.vtensor<[4,?,32,64],complex<f16>>
    %3930 = torch.aten.mul.Tensor %3929, %3926 : !torch.vtensor<[4,?,32,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,32,64],complex<f32>>
    torch.bind_symbolic_shape %3930, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 64)> : !torch.vtensor<[4,?,32,64],complex<f32>>
    %3931 = torch_c.to_builtin_tensor %3930 : !torch.vtensor<[4,?,32,64],complex<f32>> -> tensor<4x?x32x64xcomplex<f32>>
    %c1_4474 = arith.constant 1 : index
    %dim_4475 = tensor.dim %3931, %c1_4474 : tensor<4x?x32x64xcomplex<f32>>
    %3932 = flow.tensor.bitcast %3931 : tensor<4x?x32x64xcomplex<f32>>{%dim_4475} -> tensor<4x?x32x128xf32>{%dim_4475}
    %3933 = torch_c.from_builtin_tensor %3932 : tensor<4x?x32x128xf32> -> !torch.vtensor<[4,?,32,128],f32>
    torch.bind_symbolic_shape %3933, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f32>
    %int5_4476 = torch.constant.int 5
    %3934 = torch.prims.convert_element_type %3933, %int5_4476 : !torch.vtensor<[4,?,32,128],f32>, !torch.int -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %3934, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int131072_4477 = torch.constant.int 131072
    %none_4478 = torch.constant.none
    %none_4479 = torch.constant.none
    %cpu_4480 = torch.constant.device "cpu"
    %false_4481 = torch.constant.bool false
    %3935 = torch.aten.arange %int131072_4477, %none_4478, %none_4479, %cpu_4480, %false_4481 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64>
    %int0_4482 = torch.constant.int 0
    %int128_4483 = torch.constant.int 128
    %int2_4484 = torch.constant.int 2
    %none_4485 = torch.constant.none
    %none_4486 = torch.constant.none
    %cpu_4487 = torch.constant.device "cpu"
    %false_4488 = torch.constant.bool false
    %3936 = torch.aten.arange.start_step %int0_4482, %int128_4483, %int2_4484, %none_4485, %none_4486, %cpu_4487, %false_4488 : !torch.int, !torch.int, !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64>
    %int0_4489 = torch.constant.int 0
    %int0_4490 = torch.constant.int 0
    %int64_4491 = torch.constant.int 64
    %int1_4492 = torch.constant.int 1
    %3937 = torch.aten.slice.Tensor %3936, %int0_4489, %int0_4490, %int64_4491, %int1_4492 : !torch.vtensor<[64],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[64],si64>
    %int6_4493 = torch.constant.int 6
    %3938 = torch.prims.convert_element_type %3937, %int6_4493 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32>
    %int128_4494 = torch.constant.int 128
    %3939 = torch.aten.div.Scalar %3938, %int128_4494 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32>
    %float5.000000e05_4495 = torch.constant.float 5.000000e+05
    %3940 = torch.aten.pow.Scalar %float5.000000e05_4495, %3939 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %3941 = torch.aten.reciprocal %3940 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %float1.000000e00_4496 = torch.constant.float 1.000000e+00
    %3942 = torch.aten.mul.Scalar %3941, %float1.000000e00_4496 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32>
    %int131072_4497 = torch.constant.int 131072
    %int1_4498 = torch.constant.int 1
    %3943 = torch.prim.ListConstruct %int131072_4497, %int1_4498 : (!torch.int, !torch.int) -> !torch.list<int>
    %3944 = torch.aten.view %3935, %3943 : !torch.vtensor<[131072],si64>, !torch.list<int> -> !torch.vtensor<[131072,1],si64>
    %3945 = torch.aten.mul.Tensor %3944, %3942 : !torch.vtensor<[131072,1],si64>, !torch.vtensor<[64],f32> -> !torch.vtensor<[131072,64],f32>
    %3946 = torch.aten.cos %3945 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %3947 = torch.aten.sin %3945 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %3948 = torch.aten.complex %3946, %3947 : !torch.vtensor<[131072,64],f32>, !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],complex<f32>>
    %int1_4499 = torch.constant.int 1
    %3949 = torch.aten.size.int %3892, %int1_4499 : !torch.vtensor<[4,?,1024],f16>, !torch.int -> !torch.int
    %int0_4500 = torch.constant.int 0
    %3950 = torch.aten.add.int %int0_4500, %3949 : !torch.int, !torch.int -> !torch.int
    %int0_4501 = torch.constant.int 0
    %int0_4502 = torch.constant.int 0
    %int1_4503 = torch.constant.int 1
    %3951 = torch.aten.slice.Tensor %3948, %int0_4501, %int0_4502, %3950, %int1_4503 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %3951, [%292], affine_map<()[s0] -> (s0 * 32, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_4504 = torch.constant.int 1
    %int0_4505 = torch.constant.int 0
    %int9223372036854775807_4506 = torch.constant.int 9223372036854775807
    %int1_4507 = torch.constant.int 1
    %3952 = torch.aten.slice.Tensor %3951, %int1_4504, %int0_4505, %int9223372036854775807_4506, %int1_4507 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %3952, [%292], affine_map<()[s0] -> (s0 * 32, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_4508 = torch.constant.int 0
    %3953 = torch.aten.unsqueeze %3952, %int0_4508 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %3953, [%292], affine_map<()[s0] -> (1, s0 * 32, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_4509 = torch.constant.int 2
    %3954 = torch.aten.unsqueeze %3953, %int2_4509 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %3954, [%292], affine_map<()[s0] -> (1, s0 * 32, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_4510 = torch.constant.int 3
    %int0_4511 = torch.constant.int 0
    %int9223372036854775807_4512 = torch.constant.int 9223372036854775807
    %int1_4513 = torch.constant.int 1
    %3955 = torch.aten.slice.Tensor %3954, %int3_4510, %int0_4511, %int9223372036854775807_4512, %int1_4513 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %3955, [%292], affine_map<()[s0] -> (1, s0 * 32, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %3956 = torch_c.to_builtin_tensor %3903 : !torch.vtensor<[4,?,8,128],f16> -> tensor<4x?x8x128xf16>
    %c1_4514 = arith.constant 1 : index
    %dim_4515 = tensor.dim %3956, %c1_4514 : tensor<4x?x8x128xf16>
    %3957 = flow.tensor.bitcast %3956 : tensor<4x?x8x128xf16>{%dim_4515} -> tensor<4x?x8x64xcomplex<f16>>{%dim_4515}
    %3958 = torch_c.from_builtin_tensor %3957 : tensor<4x?x8x64xcomplex<f16>> -> !torch.vtensor<[4,?,8,64],complex<f16>>
    torch.bind_symbolic_shape %3958, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 64)> : !torch.vtensor<[4,?,8,64],complex<f16>>
    %3959 = torch.aten.mul.Tensor %3958, %3955 : !torch.vtensor<[4,?,8,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,8,64],complex<f32>>
    torch.bind_symbolic_shape %3959, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 64)> : !torch.vtensor<[4,?,8,64],complex<f32>>
    %3960 = torch_c.to_builtin_tensor %3959 : !torch.vtensor<[4,?,8,64],complex<f32>> -> tensor<4x?x8x64xcomplex<f32>>
    %c1_4516 = arith.constant 1 : index
    %dim_4517 = tensor.dim %3960, %c1_4516 : tensor<4x?x8x64xcomplex<f32>>
    %3961 = flow.tensor.bitcast %3960 : tensor<4x?x8x64xcomplex<f32>>{%dim_4517} -> tensor<4x?x8x128xf32>{%dim_4517}
    %3962 = torch_c.from_builtin_tensor %3961 : tensor<4x?x8x128xf32> -> !torch.vtensor<[4,?,8,128],f32>
    torch.bind_symbolic_shape %3962, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 128)> : !torch.vtensor<[4,?,8,128],f32>
    %int5_4518 = torch.constant.int 5
    %3963 = torch.prims.convert_element_type %3962, %int5_4518 : !torch.vtensor<[4,?,8,128],f32>, !torch.int -> !torch.vtensor<[4,?,8,128],f16>
    torch.bind_symbolic_shape %3963, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 128)> : !torch.vtensor<[4,?,8,128],f16>
    %int64_4519 = torch.constant.int 64
    %3964 = torch.aten.mul.Scalar %arg2, %int64_4519 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %3964, [%292], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int36 = torch.constant.int 36
    %int1_4520 = torch.constant.int 1
    %3965 = torch.aten.add.Scalar %3964, %int36, %int1_4520 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %3965, [%292], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int4_4521 = torch.constant.int 4
    %int32_4522 = torch.constant.int 32
    %int8_4523 = torch.constant.int 8
    %int128_4524 = torch.constant.int 128
    %3966 = torch.prim.ListConstruct %int4_4521, %425, %int32_4522, %int8_4523, %int128_4524 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3967 = torch.aten.view %3963, %3966 : !torch.vtensor<[4,?,8,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,8,128],f16>
    torch.bind_symbolic_shape %3967, [%292], affine_map<()[s0] -> (4, s0, 32, 8, 128)> : !torch.vtensor<[4,?,32,8,128],f16>
    %int4_4525 = torch.constant.int 4
    %3968 = torch.aten.mul.int %int4_4525, %425 : !torch.int, !torch.int -> !torch.int
    %int32_4526 = torch.constant.int 32
    %int8_4527 = torch.constant.int 8
    %int128_4528 = torch.constant.int 128
    %3969 = torch.prim.ListConstruct %3968, %int32_4526, %int8_4527, %int128_4528 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3970 = torch.aten.view %3967, %3969 : !torch.vtensor<[4,?,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %3970, [%292], affine_map<()[s0] -> (s0 * 4, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int4_4529 = torch.constant.int 4
    %3971 = torch.aten.mul.int %int4_4529, %425 : !torch.int, !torch.int -> !torch.int
    %3972 = torch.prim.ListConstruct %3971 : (!torch.int) -> !torch.list<int>
    %3973 = torch.aten.view %3965, %3972 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %3973, [%292], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int32_4530 = torch.constant.int 32
    %int2_4531 = torch.constant.int 2
    %int32_4532 = torch.constant.int 32
    %int8_4533 = torch.constant.int 8
    %int128_4534 = torch.constant.int 128
    %3974 = torch.prim.ListConstruct %416, %int32_4530, %int2_4531, %int32_4532, %int8_4533, %int128_4534 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3975 = torch.aten.view %3807, %3974 : !torch.vtensor<[?,2097152],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f16>
    torch.bind_symbolic_shape %3975, [%293], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f16>
    %int32_4535 = torch.constant.int 32
    %3976 = torch.aten.mul.int %416, %int32_4535 : !torch.int, !torch.int -> !torch.int
    %int2_4536 = torch.constant.int 2
    %3977 = torch.aten.mul.int %3976, %int2_4536 : !torch.int, !torch.int -> !torch.int
    %int32_4537 = torch.constant.int 32
    %int8_4538 = torch.constant.int 8
    %int128_4539 = torch.constant.int 128
    %3978 = torch.prim.ListConstruct %3977, %int32_4537, %int8_4538, %int128_4539 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3979 = torch.aten.view %3975, %3978 : !torch.vtensor<[?,32,2,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %3979, [%293], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %3980 = torch.prim.ListConstruct %3973 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_4540 = torch.constant.bool false
    %3981 = torch.aten.index_put %3979, %3980, %3970, %false_4540 : !torch.vtensor<[?,32,8,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,32,8,128],f16>, !torch.bool -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %3981, [%293], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int32_4541 = torch.constant.int 32
    %int2_4542 = torch.constant.int 2
    %int32_4543 = torch.constant.int 32
    %int8_4544 = torch.constant.int 8
    %int128_4545 = torch.constant.int 128
    %3982 = torch.prim.ListConstruct %416, %int32_4541, %int2_4542, %int32_4543, %int8_4544, %int128_4545 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3983 = torch.aten.view %3981, %3982 : !torch.vtensor<[?,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f16>
    torch.bind_symbolic_shape %3983, [%293], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f16>
    %int2097152_4546 = torch.constant.int 2097152
    %3984 = torch.prim.ListConstruct %416, %int2097152_4546 : (!torch.int, !torch.int) -> !torch.list<int>
    %3985 = torch.aten.view %3983, %3984 : !torch.vtensor<[?,32,2,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,2097152],f16>
    torch.bind_symbolic_shape %3985, [%293], affine_map<()[s0] -> (s0, 2097152)> : !torch.vtensor<[?,2097152],f16>
    %int32_4547 = torch.constant.int 32
    %int2_4548 = torch.constant.int 2
    %int32_4549 = torch.constant.int 32
    %int8_4550 = torch.constant.int 8
    %int128_4551 = torch.constant.int 128
    %3986 = torch.prim.ListConstruct %416, %int32_4547, %int2_4548, %int32_4549, %int8_4550, %int128_4551 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3987 = torch.aten.view %3985, %3986 : !torch.vtensor<[?,2097152],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f16>
    torch.bind_symbolic_shape %3987, [%293], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f16>
    %int32_4552 = torch.constant.int 32
    %int8_4553 = torch.constant.int 8
    %int128_4554 = torch.constant.int 128
    %3988 = torch.prim.ListConstruct %3977, %int32_4552, %int8_4553, %int128_4554 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3989 = torch.aten.view %3987, %3988 : !torch.vtensor<[?,32,2,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %3989, [%293], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int4_4555 = torch.constant.int 4
    %int32_4556 = torch.constant.int 32
    %int8_4557 = torch.constant.int 8
    %int128_4558 = torch.constant.int 128
    %3990 = torch.prim.ListConstruct %int4_4555, %425, %int32_4556, %int8_4557, %int128_4558 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3991 = torch.aten.view %3905, %3990 : !torch.vtensor<[4,?,8,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,8,128],f16>
    torch.bind_symbolic_shape %3991, [%292], affine_map<()[s0] -> (4, s0, 32, 8, 128)> : !torch.vtensor<[4,?,32,8,128],f16>
    %int4_4559 = torch.constant.int 4
    %3992 = torch.aten.mul.int %int4_4559, %425 : !torch.int, !torch.int -> !torch.int
    %int32_4560 = torch.constant.int 32
    %int8_4561 = torch.constant.int 8
    %int128_4562 = torch.constant.int 128
    %3993 = torch.prim.ListConstruct %3992, %int32_4560, %int8_4561, %int128_4562 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3994 = torch.aten.view %3991, %3993 : !torch.vtensor<[4,?,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %3994, [%292], affine_map<()[s0] -> (s0 * 4, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int1_4563 = torch.constant.int 1
    %int1_4564 = torch.constant.int 1
    %3995 = torch.aten.add.Scalar %3965, %int1_4563, %int1_4564 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %3995, [%292], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int4_4565 = torch.constant.int 4
    %3996 = torch.aten.mul.int %int4_4565, %425 : !torch.int, !torch.int -> !torch.int
    %3997 = torch.prim.ListConstruct %3996 : (!torch.int) -> !torch.list<int>
    %3998 = torch.aten.view %3995, %3997 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %3998, [%292], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %3999 = torch.prim.ListConstruct %3998 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_4566 = torch.constant.bool false
    %4000 = torch.aten.index_put %3989, %3999, %3994, %false_4566 : !torch.vtensor<[?,32,8,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,32,8,128],f16>, !torch.bool -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %4000, [%293], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int32_4567 = torch.constant.int 32
    %int2_4568 = torch.constant.int 2
    %int32_4569 = torch.constant.int 32
    %int8_4570 = torch.constant.int 8
    %int128_4571 = torch.constant.int 128
    %4001 = torch.prim.ListConstruct %416, %int32_4567, %int2_4568, %int32_4569, %int8_4570, %int128_4571 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4002 = torch.aten.view %4000, %4001 : !torch.vtensor<[?,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f16>
    torch.bind_symbolic_shape %4002, [%293], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f16>
    %int2097152_4572 = torch.constant.int 2097152
    %4003 = torch.prim.ListConstruct %416, %int2097152_4572 : (!torch.int, !torch.int) -> !torch.list<int>
    %4004 = torch.aten.view %4002, %4003 : !torch.vtensor<[?,32,2,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,2097152],f16>
    torch.bind_symbolic_shape %4004, [%293], affine_map<()[s0] -> (s0, 2097152)> : !torch.vtensor<[?,2097152],f16>
    %int-2_4573 = torch.constant.int -2
    %4005 = torch.aten.unsqueeze %3963, %int-2_4573 : !torch.vtensor<[4,?,8,128],f16>, !torch.int -> !torch.vtensor<[4,?,8,1,128],f16>
    torch.bind_symbolic_shape %4005, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 1, 128)> : !torch.vtensor<[4,?,8,1,128],f16>
    %int4_4574 = torch.constant.int 4
    %int8_4575 = torch.constant.int 8
    %int4_4576 = torch.constant.int 4
    %int128_4577 = torch.constant.int 128
    %4006 = torch.prim.ListConstruct %int4_4574, %3949, %int8_4575, %int4_4576, %int128_4577 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_4578 = torch.constant.bool false
    %4007 = torch.aten.expand %4005, %4006, %false_4578 : !torch.vtensor<[4,?,8,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,8,4,128],f16>
    torch.bind_symbolic_shape %4007, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 4, 128)> : !torch.vtensor<[4,?,8,4,128],f16>
    %int0_4579 = torch.constant.int 0
    %4008 = torch.aten.clone %4007, %int0_4579 : !torch.vtensor<[4,?,8,4,128],f16>, !torch.int -> !torch.vtensor<[4,?,8,4,128],f16>
    torch.bind_symbolic_shape %4008, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 4, 128)> : !torch.vtensor<[4,?,8,4,128],f16>
    %int4_4580 = torch.constant.int 4
    %int32_4581 = torch.constant.int 32
    %int128_4582 = torch.constant.int 128
    %4009 = torch.prim.ListConstruct %int4_4580, %3949, %int32_4581, %int128_4582 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4010 = torch.aten._unsafe_view %4008, %4009 : !torch.vtensor<[4,?,8,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %4010, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int-2_4583 = torch.constant.int -2
    %4011 = torch.aten.unsqueeze %3905, %int-2_4583 : !torch.vtensor<[4,?,8,128],f16>, !torch.int -> !torch.vtensor<[4,?,8,1,128],f16>
    torch.bind_symbolic_shape %4011, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 1, 128)> : !torch.vtensor<[4,?,8,1,128],f16>
    %int1_4584 = torch.constant.int 1
    %4012 = torch.aten.size.int %3899, %int1_4584 : !torch.vtensor<[4,?,1024],f16>, !torch.int -> !torch.int
    %int4_4585 = torch.constant.int 4
    %int8_4586 = torch.constant.int 8
    %int4_4587 = torch.constant.int 4
    %int128_4588 = torch.constant.int 128
    %4013 = torch.prim.ListConstruct %int4_4585, %4012, %int8_4586, %int4_4587, %int128_4588 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_4589 = torch.constant.bool false
    %4014 = torch.aten.expand %4011, %4013, %false_4589 : !torch.vtensor<[4,?,8,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,8,4,128],f16>
    torch.bind_symbolic_shape %4014, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 4, 128)> : !torch.vtensor<[4,?,8,4,128],f16>
    %int0_4590 = torch.constant.int 0
    %4015 = torch.aten.clone %4014, %int0_4590 : !torch.vtensor<[4,?,8,4,128],f16>, !torch.int -> !torch.vtensor<[4,?,8,4,128],f16>
    torch.bind_symbolic_shape %4015, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 4, 128)> : !torch.vtensor<[4,?,8,4,128],f16>
    %int4_4591 = torch.constant.int 4
    %int32_4592 = torch.constant.int 32
    %int128_4593 = torch.constant.int 128
    %4016 = torch.prim.ListConstruct %int4_4591, %4012, %int32_4592, %int128_4593 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4017 = torch.aten._unsafe_view %4015, %4016 : !torch.vtensor<[4,?,8,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %4017, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int1_4594 = torch.constant.int 1
    %int2_4595 = torch.constant.int 2
    %4018 = torch.aten.transpose.int %3934, %int1_4594, %int2_4595 : !torch.vtensor<[4,?,32,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,32,?,128],f16>
    torch.bind_symbolic_shape %4018, [%292], affine_map<()[s0] -> (4, 32, s0 * 32, 128)> : !torch.vtensor<[4,32,?,128],f16>
    %int1_4596 = torch.constant.int 1
    %int2_4597 = torch.constant.int 2
    %4019 = torch.aten.transpose.int %4010, %int1_4596, %int2_4597 : !torch.vtensor<[4,?,32,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,32,?,128],f16>
    torch.bind_symbolic_shape %4019, [%292], affine_map<()[s0] -> (4, 32, s0 * 32, 128)> : !torch.vtensor<[4,32,?,128],f16>
    %int1_4598 = torch.constant.int 1
    %int2_4599 = torch.constant.int 2
    %4020 = torch.aten.transpose.int %4017, %int1_4598, %int2_4599 : !torch.vtensor<[4,?,32,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,32,?,128],f16>
    torch.bind_symbolic_shape %4020, [%292], affine_map<()[s0] -> (4, 32, s0 * 32, 128)> : !torch.vtensor<[4,32,?,128],f16>
    %float0.000000e00_4600 = torch.constant.float 0.000000e+00
    %false_4601 = torch.constant.bool false
    %none_4602 = torch.constant.none
    %4021:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%4018, %4019, %4020, %float0.000000e00_4600, %false_4601, %320, %none_4602) : (!torch.vtensor<[4,32,?,128],f16>, !torch.vtensor<[4,32,?,128],f16>, !torch.vtensor<[4,32,?,128],f16>, !torch.float, !torch.bool, !torch.vtensor<[4,1,?,?],f16>, !torch.none) -> (!torch.vtensor<[4,32,?,128],f16>, !torch.vtensor<[4,32,?],f32>) 
    torch.bind_symbolic_shape %4021#0, [%292], affine_map<()[s0] -> (4, 32, s0 * 32, 128)> : !torch.vtensor<[4,32,?,128],f16>
    %int1_4603 = torch.constant.int 1
    %int2_4604 = torch.constant.int 2
    %4022 = torch.aten.transpose.int %4021#0, %int1_4603, %int2_4604 : !torch.vtensor<[4,32,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %4022, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int4_4605 = torch.constant.int 4
    %int4096_4606 = torch.constant.int 4096
    %4023 = torch.prim.ListConstruct %int4_4605, %3920, %int4096_4606 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4024 = torch.aten.view %4022, %4023 : !torch.vtensor<[4,?,32,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4024, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int-2_4607 = torch.constant.int -2
    %int-1_4608 = torch.constant.int -1
    %4025 = torch.aten.transpose.int %167, %int-2_4607, %int-1_4608 : !torch.vtensor<[4096,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,4096],f16>
    %int4_4609 = torch.constant.int 4
    %4026 = torch.aten.mul.int %int4_4609, %3920 : !torch.int, !torch.int -> !torch.int
    %int4096_4610 = torch.constant.int 4096
    %4027 = torch.prim.ListConstruct %4026, %int4096_4610 : (!torch.int, !torch.int) -> !torch.list<int>
    %4028 = torch.aten.view %4024, %4027 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %4028, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %4029 = torch.aten.mm %4028, %4025 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %4029, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_4611 = torch.constant.int 4
    %int4096_4612 = torch.constant.int 4096
    %4030 = torch.prim.ListConstruct %int4_4611, %3920, %int4096_4612 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4031 = torch.aten.view %4029, %4030 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4031, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_4613 = torch.constant.int 1
    %4032 = torch.aten.add.Tensor %3869, %4031, %int1_4613 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4032, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int6_4614 = torch.constant.int 6
    %4033 = torch.prims.convert_element_type %4032, %int6_4614 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %4033, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_4615 = torch.constant.int 2
    %4034 = torch.aten.pow.Tensor_Scalar %4033, %int2_4615 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %4034, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int-1_4616 = torch.constant.int -1
    %4035 = torch.prim.ListConstruct %int-1_4616 : (!torch.int) -> !torch.list<int>
    %true_4617 = torch.constant.bool true
    %none_4618 = torch.constant.none
    %4036 = torch.aten.mean.dim %4034, %4035, %true_4617, %none_4618 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %4036, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_4619 = torch.constant.float 9.9999997473787516E-6
    %int1_4620 = torch.constant.int 1
    %4037 = torch.aten.add.Scalar %4036, %float9.999990e-06_4619, %int1_4620 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %4037, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %4038 = torch.aten.rsqrt %4037 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %4038, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %4039 = torch.aten.mul.Tensor %4033, %4038 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %4039, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %4040 = torch.aten.mul.Tensor %168, %4039 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %4040, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int5_4621 = torch.constant.int 5
    %4041 = torch.prims.convert_element_type %4040, %int5_4621 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4041, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int-2_4622 = torch.constant.int -2
    %int-1_4623 = torch.constant.int -1
    %4042 = torch.aten.transpose.int %169, %int-2_4622, %int-1_4623 : !torch.vtensor<[14336,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,14336],f16>
    %int4_4624 = torch.constant.int 4
    %4043 = torch.aten.mul.int %int4_4624, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_4625 = torch.constant.int 4096
    %4044 = torch.prim.ListConstruct %4043, %int4096_4625 : (!torch.int, !torch.int) -> !torch.list<int>
    %4045 = torch.aten.view %4041, %4044 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %4045, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %4046 = torch.aten.mm %4045, %4042 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,14336],f16> -> !torch.vtensor<[?,14336],f16>
    torch.bind_symbolic_shape %4046, [%292], affine_map<()[s0] -> (s0 * 128, 14336)> : !torch.vtensor<[?,14336],f16>
    %int4_4626 = torch.constant.int 4
    %int14336_4627 = torch.constant.int 14336
    %4047 = torch.prim.ListConstruct %int4_4626, %294, %int14336_4627 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4048 = torch.aten.view %4046, %4047 : !torch.vtensor<[?,14336],f16>, !torch.list<int> -> !torch.vtensor<[4,?,14336],f16>
    torch.bind_symbolic_shape %4048, [%292], affine_map<()[s0] -> (4, s0 * 32, 14336)> : !torch.vtensor<[4,?,14336],f16>
    %4049 = torch.aten.silu %4048 : !torch.vtensor<[4,?,14336],f16> -> !torch.vtensor<[4,?,14336],f16>
    torch.bind_symbolic_shape %4049, [%292], affine_map<()[s0] -> (4, s0 * 32, 14336)> : !torch.vtensor<[4,?,14336],f16>
    %int-2_4628 = torch.constant.int -2
    %int-1_4629 = torch.constant.int -1
    %4050 = torch.aten.transpose.int %170, %int-2_4628, %int-1_4629 : !torch.vtensor<[14336,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,14336],f16>
    %int4_4630 = torch.constant.int 4
    %4051 = torch.aten.mul.int %int4_4630, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_4631 = torch.constant.int 4096
    %4052 = torch.prim.ListConstruct %4051, %int4096_4631 : (!torch.int, !torch.int) -> !torch.list<int>
    %4053 = torch.aten.view %4041, %4052 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %4053, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %4054 = torch.aten.mm %4053, %4050 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,14336],f16> -> !torch.vtensor<[?,14336],f16>
    torch.bind_symbolic_shape %4054, [%292], affine_map<()[s0] -> (s0 * 128, 14336)> : !torch.vtensor<[?,14336],f16>
    %int4_4632 = torch.constant.int 4
    %int14336_4633 = torch.constant.int 14336
    %4055 = torch.prim.ListConstruct %int4_4632, %294, %int14336_4633 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4056 = torch.aten.view %4054, %4055 : !torch.vtensor<[?,14336],f16>, !torch.list<int> -> !torch.vtensor<[4,?,14336],f16>
    torch.bind_symbolic_shape %4056, [%292], affine_map<()[s0] -> (4, s0 * 32, 14336)> : !torch.vtensor<[4,?,14336],f16>
    %4057 = torch.aten.mul.Tensor %4049, %4056 : !torch.vtensor<[4,?,14336],f16>, !torch.vtensor<[4,?,14336],f16> -> !torch.vtensor<[4,?,14336],f16>
    torch.bind_symbolic_shape %4057, [%292], affine_map<()[s0] -> (4, s0 * 32, 14336)> : !torch.vtensor<[4,?,14336],f16>
    %int-2_4634 = torch.constant.int -2
    %int-1_4635 = torch.constant.int -1
    %4058 = torch.aten.transpose.int %171, %int-2_4634, %int-1_4635 : !torch.vtensor<[4096,14336],f16>, !torch.int, !torch.int -> !torch.vtensor<[14336,4096],f16>
    %int1_4636 = torch.constant.int 1
    %4059 = torch.aten.size.int %4048, %int1_4636 : !torch.vtensor<[4,?,14336],f16>, !torch.int -> !torch.int
    %int4_4637 = torch.constant.int 4
    %4060 = torch.aten.mul.int %int4_4637, %4059 : !torch.int, !torch.int -> !torch.int
    %int14336_4638 = torch.constant.int 14336
    %4061 = torch.prim.ListConstruct %4060, %int14336_4638 : (!torch.int, !torch.int) -> !torch.list<int>
    %4062 = torch.aten.view %4057, %4061 : !torch.vtensor<[4,?,14336],f16>, !torch.list<int> -> !torch.vtensor<[?,14336],f16>
    torch.bind_symbolic_shape %4062, [%292], affine_map<()[s0] -> (s0 * 128, 14336)> : !torch.vtensor<[?,14336],f16>
    %4063 = torch.aten.mm %4062, %4058 : !torch.vtensor<[?,14336],f16>, !torch.vtensor<[14336,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %4063, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_4639 = torch.constant.int 4
    %int4096_4640 = torch.constant.int 4096
    %4064 = torch.prim.ListConstruct %int4_4639, %4059, %int4096_4640 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4065 = torch.aten.view %4063, %4064 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4065, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_4641 = torch.constant.int 1
    %4066 = torch.aten.add.Tensor %4032, %4065, %int1_4641 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4066, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int6_4642 = torch.constant.int 6
    %4067 = torch.prims.convert_element_type %4066, %int6_4642 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %4067, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_4643 = torch.constant.int 2
    %4068 = torch.aten.pow.Tensor_Scalar %4067, %int2_4643 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %4068, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int-1_4644 = torch.constant.int -1
    %4069 = torch.prim.ListConstruct %int-1_4644 : (!torch.int) -> !torch.list<int>
    %true_4645 = torch.constant.bool true
    %none_4646 = torch.constant.none
    %4070 = torch.aten.mean.dim %4068, %4069, %true_4645, %none_4646 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %4070, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_4647 = torch.constant.float 9.9999997473787516E-6
    %int1_4648 = torch.constant.int 1
    %4071 = torch.aten.add.Scalar %4070, %float9.999990e-06_4647, %int1_4648 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %4071, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %4072 = torch.aten.rsqrt %4071 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %4072, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %4073 = torch.aten.mul.Tensor %4067, %4072 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %4073, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %4074 = torch.aten.mul.Tensor %172, %4073 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %4074, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int5_4649 = torch.constant.int 5
    %4075 = torch.prims.convert_element_type %4074, %int5_4649 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4075, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int-2_4650 = torch.constant.int -2
    %int-1_4651 = torch.constant.int -1
    %4076 = torch.aten.transpose.int %173, %int-2_4650, %int-1_4651 : !torch.vtensor<[4096,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,4096],f16>
    %int4_4652 = torch.constant.int 4
    %4077 = torch.aten.mul.int %int4_4652, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_4653 = torch.constant.int 4096
    %4078 = torch.prim.ListConstruct %4077, %int4096_4653 : (!torch.int, !torch.int) -> !torch.list<int>
    %4079 = torch.aten.view %4075, %4078 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %4079, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %4080 = torch.aten.mm %4079, %4076 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %4080, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_4654 = torch.constant.int 4
    %int4096_4655 = torch.constant.int 4096
    %4081 = torch.prim.ListConstruct %int4_4654, %294, %int4096_4655 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4082 = torch.aten.view %4080, %4081 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4082, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int-2_4656 = torch.constant.int -2
    %int-1_4657 = torch.constant.int -1
    %4083 = torch.aten.transpose.int %174, %int-2_4656, %int-1_4657 : !torch.vtensor<[1024,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,1024],f16>
    %int4_4658 = torch.constant.int 4
    %4084 = torch.aten.mul.int %int4_4658, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_4659 = torch.constant.int 4096
    %4085 = torch.prim.ListConstruct %4084, %int4096_4659 : (!torch.int, !torch.int) -> !torch.list<int>
    %4086 = torch.aten.view %4075, %4085 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %4086, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %4087 = torch.aten.mm %4086, %4083 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1024],f16> -> !torch.vtensor<[?,1024],f16>
    torch.bind_symbolic_shape %4087, [%292], affine_map<()[s0] -> (s0 * 128, 1024)> : !torch.vtensor<[?,1024],f16>
    %int4_4660 = torch.constant.int 4
    %int1024_4661 = torch.constant.int 1024
    %4088 = torch.prim.ListConstruct %int4_4660, %294, %int1024_4661 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4089 = torch.aten.view %4087, %4088 : !torch.vtensor<[?,1024],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1024],f16>
    torch.bind_symbolic_shape %4089, [%292], affine_map<()[s0] -> (4, s0 * 32, 1024)> : !torch.vtensor<[4,?,1024],f16>
    %int-2_4662 = torch.constant.int -2
    %int-1_4663 = torch.constant.int -1
    %4090 = torch.aten.transpose.int %175, %int-2_4662, %int-1_4663 : !torch.vtensor<[1024,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,1024],f16>
    %int4_4664 = torch.constant.int 4
    %4091 = torch.aten.mul.int %int4_4664, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_4665 = torch.constant.int 4096
    %4092 = torch.prim.ListConstruct %4091, %int4096_4665 : (!torch.int, !torch.int) -> !torch.list<int>
    %4093 = torch.aten.view %4075, %4092 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %4093, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %4094 = torch.aten.mm %4093, %4090 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1024],f16> -> !torch.vtensor<[?,1024],f16>
    torch.bind_symbolic_shape %4094, [%292], affine_map<()[s0] -> (s0 * 128, 1024)> : !torch.vtensor<[?,1024],f16>
    %int4_4666 = torch.constant.int 4
    %int1024_4667 = torch.constant.int 1024
    %4095 = torch.prim.ListConstruct %int4_4666, %294, %int1024_4667 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4096 = torch.aten.view %4094, %4095 : !torch.vtensor<[?,1024],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1024],f16>
    torch.bind_symbolic_shape %4096, [%292], affine_map<()[s0] -> (4, s0 * 32, 1024)> : !torch.vtensor<[4,?,1024],f16>
    %int4_4668 = torch.constant.int 4
    %int32_4669 = torch.constant.int 32
    %int128_4670 = torch.constant.int 128
    %4097 = torch.prim.ListConstruct %int4_4668, %294, %int32_4669, %int128_4670 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4098 = torch.aten.view %4082, %4097 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %4098, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int4_4671 = torch.constant.int 4
    %int8_4672 = torch.constant.int 8
    %int128_4673 = torch.constant.int 128
    %4099 = torch.prim.ListConstruct %int4_4671, %294, %int8_4672, %int128_4673 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4100 = torch.aten.view %4089, %4099 : !torch.vtensor<[4,?,1024],f16>, !torch.list<int> -> !torch.vtensor<[4,?,8,128],f16>
    torch.bind_symbolic_shape %4100, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 128)> : !torch.vtensor<[4,?,8,128],f16>
    %int4_4674 = torch.constant.int 4
    %int8_4675 = torch.constant.int 8
    %int128_4676 = torch.constant.int 128
    %4101 = torch.prim.ListConstruct %int4_4674, %294, %int8_4675, %int128_4676 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4102 = torch.aten.view %4096, %4101 : !torch.vtensor<[4,?,1024],f16>, !torch.list<int> -> !torch.vtensor<[4,?,8,128],f16>
    torch.bind_symbolic_shape %4102, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 128)> : !torch.vtensor<[4,?,8,128],f16>
    %int131072_4677 = torch.constant.int 131072
    %none_4678 = torch.constant.none
    %none_4679 = torch.constant.none
    %cpu_4680 = torch.constant.device "cpu"
    %false_4681 = torch.constant.bool false
    %4103 = torch.aten.arange %int131072_4677, %none_4678, %none_4679, %cpu_4680, %false_4681 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64>
    %int0_4682 = torch.constant.int 0
    %int128_4683 = torch.constant.int 128
    %int2_4684 = torch.constant.int 2
    %none_4685 = torch.constant.none
    %none_4686 = torch.constant.none
    %cpu_4687 = torch.constant.device "cpu"
    %false_4688 = torch.constant.bool false
    %4104 = torch.aten.arange.start_step %int0_4682, %int128_4683, %int2_4684, %none_4685, %none_4686, %cpu_4687, %false_4688 : !torch.int, !torch.int, !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64>
    %int0_4689 = torch.constant.int 0
    %int0_4690 = torch.constant.int 0
    %int64_4691 = torch.constant.int 64
    %int1_4692 = torch.constant.int 1
    %4105 = torch.aten.slice.Tensor %4104, %int0_4689, %int0_4690, %int64_4691, %int1_4692 : !torch.vtensor<[64],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[64],si64>
    %int6_4693 = torch.constant.int 6
    %4106 = torch.prims.convert_element_type %4105, %int6_4693 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32>
    %int128_4694 = torch.constant.int 128
    %4107 = torch.aten.div.Scalar %4106, %int128_4694 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32>
    %float5.000000e05_4695 = torch.constant.float 5.000000e+05
    %4108 = torch.aten.pow.Scalar %float5.000000e05_4695, %4107 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %4109 = torch.aten.reciprocal %4108 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %float1.000000e00_4696 = torch.constant.float 1.000000e+00
    %4110 = torch.aten.mul.Scalar %4109, %float1.000000e00_4696 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32>
    %int131072_4697 = torch.constant.int 131072
    %int1_4698 = torch.constant.int 1
    %4111 = torch.prim.ListConstruct %int131072_4697, %int1_4698 : (!torch.int, !torch.int) -> !torch.list<int>
    %4112 = torch.aten.view %4103, %4111 : !torch.vtensor<[131072],si64>, !torch.list<int> -> !torch.vtensor<[131072,1],si64>
    %4113 = torch.aten.mul.Tensor %4112, %4110 : !torch.vtensor<[131072,1],si64>, !torch.vtensor<[64],f32> -> !torch.vtensor<[131072,64],f32>
    %4114 = torch.aten.cos %4113 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %4115 = torch.aten.sin %4113 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %4116 = torch.aten.complex %4114, %4115 : !torch.vtensor<[131072,64],f32>, !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],complex<f32>>
    %int1_4699 = torch.constant.int 1
    %4117 = torch.aten.size.int %4082, %int1_4699 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.int
    %int0_4700 = torch.constant.int 0
    %4118 = torch.aten.add.int %int0_4700, %4117 : !torch.int, !torch.int -> !torch.int
    %int0_4701 = torch.constant.int 0
    %int0_4702 = torch.constant.int 0
    %int1_4703 = torch.constant.int 1
    %4119 = torch.aten.slice.Tensor %4116, %int0_4701, %int0_4702, %4118, %int1_4703 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %4119, [%292], affine_map<()[s0] -> (s0 * 32, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_4704 = torch.constant.int 1
    %int0_4705 = torch.constant.int 0
    %int9223372036854775807_4706 = torch.constant.int 9223372036854775807
    %int1_4707 = torch.constant.int 1
    %4120 = torch.aten.slice.Tensor %4119, %int1_4704, %int0_4705, %int9223372036854775807_4706, %int1_4707 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %4120, [%292], affine_map<()[s0] -> (s0 * 32, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_4708 = torch.constant.int 0
    %4121 = torch.aten.unsqueeze %4120, %int0_4708 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %4121, [%292], affine_map<()[s0] -> (1, s0 * 32, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_4709 = torch.constant.int 2
    %4122 = torch.aten.unsqueeze %4121, %int2_4709 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %4122, [%292], affine_map<()[s0] -> (1, s0 * 32, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_4710 = torch.constant.int 3
    %int0_4711 = torch.constant.int 0
    %int9223372036854775807_4712 = torch.constant.int 9223372036854775807
    %int1_4713 = torch.constant.int 1
    %4123 = torch.aten.slice.Tensor %4122, %int3_4710, %int0_4711, %int9223372036854775807_4712, %int1_4713 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %4123, [%292], affine_map<()[s0] -> (1, s0 * 32, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %4124 = torch_c.to_builtin_tensor %4098 : !torch.vtensor<[4,?,32,128],f16> -> tensor<4x?x32x128xf16>
    %c1_4714 = arith.constant 1 : index
    %dim_4715 = tensor.dim %4124, %c1_4714 : tensor<4x?x32x128xf16>
    %4125 = flow.tensor.bitcast %4124 : tensor<4x?x32x128xf16>{%dim_4715} -> tensor<4x?x32x64xcomplex<f16>>{%dim_4715}
    %4126 = torch_c.from_builtin_tensor %4125 : tensor<4x?x32x64xcomplex<f16>> -> !torch.vtensor<[4,?,32,64],complex<f16>>
    torch.bind_symbolic_shape %4126, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 64)> : !torch.vtensor<[4,?,32,64],complex<f16>>
    %4127 = torch.aten.mul.Tensor %4126, %4123 : !torch.vtensor<[4,?,32,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,32,64],complex<f32>>
    torch.bind_symbolic_shape %4127, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 64)> : !torch.vtensor<[4,?,32,64],complex<f32>>
    %4128 = torch_c.to_builtin_tensor %4127 : !torch.vtensor<[4,?,32,64],complex<f32>> -> tensor<4x?x32x64xcomplex<f32>>
    %c1_4716 = arith.constant 1 : index
    %dim_4717 = tensor.dim %4128, %c1_4716 : tensor<4x?x32x64xcomplex<f32>>
    %4129 = flow.tensor.bitcast %4128 : tensor<4x?x32x64xcomplex<f32>>{%dim_4717} -> tensor<4x?x32x128xf32>{%dim_4717}
    %4130 = torch_c.from_builtin_tensor %4129 : tensor<4x?x32x128xf32> -> !torch.vtensor<[4,?,32,128],f32>
    torch.bind_symbolic_shape %4130, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f32>
    %int5_4718 = torch.constant.int 5
    %4131 = torch.prims.convert_element_type %4130, %int5_4718 : !torch.vtensor<[4,?,32,128],f32>, !torch.int -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %4131, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int131072_4719 = torch.constant.int 131072
    %none_4720 = torch.constant.none
    %none_4721 = torch.constant.none
    %cpu_4722 = torch.constant.device "cpu"
    %false_4723 = torch.constant.bool false
    %4132 = torch.aten.arange %int131072_4719, %none_4720, %none_4721, %cpu_4722, %false_4723 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64>
    %int0_4724 = torch.constant.int 0
    %int128_4725 = torch.constant.int 128
    %int2_4726 = torch.constant.int 2
    %none_4727 = torch.constant.none
    %none_4728 = torch.constant.none
    %cpu_4729 = torch.constant.device "cpu"
    %false_4730 = torch.constant.bool false
    %4133 = torch.aten.arange.start_step %int0_4724, %int128_4725, %int2_4726, %none_4727, %none_4728, %cpu_4729, %false_4730 : !torch.int, !torch.int, !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64>
    %int0_4731 = torch.constant.int 0
    %int0_4732 = torch.constant.int 0
    %int64_4733 = torch.constant.int 64
    %int1_4734 = torch.constant.int 1
    %4134 = torch.aten.slice.Tensor %4133, %int0_4731, %int0_4732, %int64_4733, %int1_4734 : !torch.vtensor<[64],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[64],si64>
    %int6_4735 = torch.constant.int 6
    %4135 = torch.prims.convert_element_type %4134, %int6_4735 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32>
    %int128_4736 = torch.constant.int 128
    %4136 = torch.aten.div.Scalar %4135, %int128_4736 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32>
    %float5.000000e05_4737 = torch.constant.float 5.000000e+05
    %4137 = torch.aten.pow.Scalar %float5.000000e05_4737, %4136 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %4138 = torch.aten.reciprocal %4137 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %float1.000000e00_4738 = torch.constant.float 1.000000e+00
    %4139 = torch.aten.mul.Scalar %4138, %float1.000000e00_4738 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32>
    %int131072_4739 = torch.constant.int 131072
    %int1_4740 = torch.constant.int 1
    %4140 = torch.prim.ListConstruct %int131072_4739, %int1_4740 : (!torch.int, !torch.int) -> !torch.list<int>
    %4141 = torch.aten.view %4132, %4140 : !torch.vtensor<[131072],si64>, !torch.list<int> -> !torch.vtensor<[131072,1],si64>
    %4142 = torch.aten.mul.Tensor %4141, %4139 : !torch.vtensor<[131072,1],si64>, !torch.vtensor<[64],f32> -> !torch.vtensor<[131072,64],f32>
    %4143 = torch.aten.cos %4142 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %4144 = torch.aten.sin %4142 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %4145 = torch.aten.complex %4143, %4144 : !torch.vtensor<[131072,64],f32>, !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],complex<f32>>
    %int1_4741 = torch.constant.int 1
    %4146 = torch.aten.size.int %4089, %int1_4741 : !torch.vtensor<[4,?,1024],f16>, !torch.int -> !torch.int
    %int0_4742 = torch.constant.int 0
    %4147 = torch.aten.add.int %int0_4742, %4146 : !torch.int, !torch.int -> !torch.int
    %int0_4743 = torch.constant.int 0
    %int0_4744 = torch.constant.int 0
    %int1_4745 = torch.constant.int 1
    %4148 = torch.aten.slice.Tensor %4145, %int0_4743, %int0_4744, %4147, %int1_4745 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %4148, [%292], affine_map<()[s0] -> (s0 * 32, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_4746 = torch.constant.int 1
    %int0_4747 = torch.constant.int 0
    %int9223372036854775807_4748 = torch.constant.int 9223372036854775807
    %int1_4749 = torch.constant.int 1
    %4149 = torch.aten.slice.Tensor %4148, %int1_4746, %int0_4747, %int9223372036854775807_4748, %int1_4749 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %4149, [%292], affine_map<()[s0] -> (s0 * 32, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_4750 = torch.constant.int 0
    %4150 = torch.aten.unsqueeze %4149, %int0_4750 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %4150, [%292], affine_map<()[s0] -> (1, s0 * 32, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_4751 = torch.constant.int 2
    %4151 = torch.aten.unsqueeze %4150, %int2_4751 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %4151, [%292], affine_map<()[s0] -> (1, s0 * 32, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_4752 = torch.constant.int 3
    %int0_4753 = torch.constant.int 0
    %int9223372036854775807_4754 = torch.constant.int 9223372036854775807
    %int1_4755 = torch.constant.int 1
    %4152 = torch.aten.slice.Tensor %4151, %int3_4752, %int0_4753, %int9223372036854775807_4754, %int1_4755 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %4152, [%292], affine_map<()[s0] -> (1, s0 * 32, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %4153 = torch_c.to_builtin_tensor %4100 : !torch.vtensor<[4,?,8,128],f16> -> tensor<4x?x8x128xf16>
    %c1_4756 = arith.constant 1 : index
    %dim_4757 = tensor.dim %4153, %c1_4756 : tensor<4x?x8x128xf16>
    %4154 = flow.tensor.bitcast %4153 : tensor<4x?x8x128xf16>{%dim_4757} -> tensor<4x?x8x64xcomplex<f16>>{%dim_4757}
    %4155 = torch_c.from_builtin_tensor %4154 : tensor<4x?x8x64xcomplex<f16>> -> !torch.vtensor<[4,?,8,64],complex<f16>>
    torch.bind_symbolic_shape %4155, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 64)> : !torch.vtensor<[4,?,8,64],complex<f16>>
    %4156 = torch.aten.mul.Tensor %4155, %4152 : !torch.vtensor<[4,?,8,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,8,64],complex<f32>>
    torch.bind_symbolic_shape %4156, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 64)> : !torch.vtensor<[4,?,8,64],complex<f32>>
    %4157 = torch_c.to_builtin_tensor %4156 : !torch.vtensor<[4,?,8,64],complex<f32>> -> tensor<4x?x8x64xcomplex<f32>>
    %c1_4758 = arith.constant 1 : index
    %dim_4759 = tensor.dim %4157, %c1_4758 : tensor<4x?x8x64xcomplex<f32>>
    %4158 = flow.tensor.bitcast %4157 : tensor<4x?x8x64xcomplex<f32>>{%dim_4759} -> tensor<4x?x8x128xf32>{%dim_4759}
    %4159 = torch_c.from_builtin_tensor %4158 : tensor<4x?x8x128xf32> -> !torch.vtensor<[4,?,8,128],f32>
    torch.bind_symbolic_shape %4159, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 128)> : !torch.vtensor<[4,?,8,128],f32>
    %int5_4760 = torch.constant.int 5
    %4160 = torch.prims.convert_element_type %4159, %int5_4760 : !torch.vtensor<[4,?,8,128],f32>, !torch.int -> !torch.vtensor<[4,?,8,128],f16>
    torch.bind_symbolic_shape %4160, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 128)> : !torch.vtensor<[4,?,8,128],f16>
    %int64_4761 = torch.constant.int 64
    %4161 = torch.aten.mul.Scalar %arg2, %int64_4761 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %4161, [%292], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int38 = torch.constant.int 38
    %int1_4762 = torch.constant.int 1
    %4162 = torch.aten.add.Scalar %4161, %int38, %int1_4762 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %4162, [%292], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int4_4763 = torch.constant.int 4
    %int32_4764 = torch.constant.int 32
    %int8_4765 = torch.constant.int 8
    %int128_4766 = torch.constant.int 128
    %4163 = torch.prim.ListConstruct %int4_4763, %425, %int32_4764, %int8_4765, %int128_4766 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4164 = torch.aten.view %4160, %4163 : !torch.vtensor<[4,?,8,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,8,128],f16>
    torch.bind_symbolic_shape %4164, [%292], affine_map<()[s0] -> (4, s0, 32, 8, 128)> : !torch.vtensor<[4,?,32,8,128],f16>
    %int4_4767 = torch.constant.int 4
    %4165 = torch.aten.mul.int %int4_4767, %425 : !torch.int, !torch.int -> !torch.int
    %int32_4768 = torch.constant.int 32
    %int8_4769 = torch.constant.int 8
    %int128_4770 = torch.constant.int 128
    %4166 = torch.prim.ListConstruct %4165, %int32_4768, %int8_4769, %int128_4770 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4167 = torch.aten.view %4164, %4166 : !torch.vtensor<[4,?,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %4167, [%292], affine_map<()[s0] -> (s0 * 4, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int4_4771 = torch.constant.int 4
    %4168 = torch.aten.mul.int %int4_4771, %425 : !torch.int, !torch.int -> !torch.int
    %4169 = torch.prim.ListConstruct %4168 : (!torch.int) -> !torch.list<int>
    %4170 = torch.aten.view %4162, %4169 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %4170, [%292], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int32_4772 = torch.constant.int 32
    %int2_4773 = torch.constant.int 2
    %int32_4774 = torch.constant.int 32
    %int8_4775 = torch.constant.int 8
    %int128_4776 = torch.constant.int 128
    %4171 = torch.prim.ListConstruct %416, %int32_4772, %int2_4773, %int32_4774, %int8_4775, %int128_4776 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4172 = torch.aten.view %4004, %4171 : !torch.vtensor<[?,2097152],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f16>
    torch.bind_symbolic_shape %4172, [%293], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f16>
    %int32_4777 = torch.constant.int 32
    %4173 = torch.aten.mul.int %416, %int32_4777 : !torch.int, !torch.int -> !torch.int
    %int2_4778 = torch.constant.int 2
    %4174 = torch.aten.mul.int %4173, %int2_4778 : !torch.int, !torch.int -> !torch.int
    %int32_4779 = torch.constant.int 32
    %int8_4780 = torch.constant.int 8
    %int128_4781 = torch.constant.int 128
    %4175 = torch.prim.ListConstruct %4174, %int32_4779, %int8_4780, %int128_4781 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4176 = torch.aten.view %4172, %4175 : !torch.vtensor<[?,32,2,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %4176, [%293], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %4177 = torch.prim.ListConstruct %4170 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_4782 = torch.constant.bool false
    %4178 = torch.aten.index_put %4176, %4177, %4167, %false_4782 : !torch.vtensor<[?,32,8,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,32,8,128],f16>, !torch.bool -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %4178, [%293], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int32_4783 = torch.constant.int 32
    %int2_4784 = torch.constant.int 2
    %int32_4785 = torch.constant.int 32
    %int8_4786 = torch.constant.int 8
    %int128_4787 = torch.constant.int 128
    %4179 = torch.prim.ListConstruct %416, %int32_4783, %int2_4784, %int32_4785, %int8_4786, %int128_4787 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4180 = torch.aten.view %4178, %4179 : !torch.vtensor<[?,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f16>
    torch.bind_symbolic_shape %4180, [%293], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f16>
    %int2097152_4788 = torch.constant.int 2097152
    %4181 = torch.prim.ListConstruct %416, %int2097152_4788 : (!torch.int, !torch.int) -> !torch.list<int>
    %4182 = torch.aten.view %4180, %4181 : !torch.vtensor<[?,32,2,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,2097152],f16>
    torch.bind_symbolic_shape %4182, [%293], affine_map<()[s0] -> (s0, 2097152)> : !torch.vtensor<[?,2097152],f16>
    %int32_4789 = torch.constant.int 32
    %int2_4790 = torch.constant.int 2
    %int32_4791 = torch.constant.int 32
    %int8_4792 = torch.constant.int 8
    %int128_4793 = torch.constant.int 128
    %4183 = torch.prim.ListConstruct %416, %int32_4789, %int2_4790, %int32_4791, %int8_4792, %int128_4793 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4184 = torch.aten.view %4182, %4183 : !torch.vtensor<[?,2097152],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f16>
    torch.bind_symbolic_shape %4184, [%293], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f16>
    %int32_4794 = torch.constant.int 32
    %int8_4795 = torch.constant.int 8
    %int128_4796 = torch.constant.int 128
    %4185 = torch.prim.ListConstruct %4174, %int32_4794, %int8_4795, %int128_4796 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4186 = torch.aten.view %4184, %4185 : !torch.vtensor<[?,32,2,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %4186, [%293], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int4_4797 = torch.constant.int 4
    %int32_4798 = torch.constant.int 32
    %int8_4799 = torch.constant.int 8
    %int128_4800 = torch.constant.int 128
    %4187 = torch.prim.ListConstruct %int4_4797, %425, %int32_4798, %int8_4799, %int128_4800 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4188 = torch.aten.view %4102, %4187 : !torch.vtensor<[4,?,8,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,8,128],f16>
    torch.bind_symbolic_shape %4188, [%292], affine_map<()[s0] -> (4, s0, 32, 8, 128)> : !torch.vtensor<[4,?,32,8,128],f16>
    %int4_4801 = torch.constant.int 4
    %4189 = torch.aten.mul.int %int4_4801, %425 : !torch.int, !torch.int -> !torch.int
    %int32_4802 = torch.constant.int 32
    %int8_4803 = torch.constant.int 8
    %int128_4804 = torch.constant.int 128
    %4190 = torch.prim.ListConstruct %4189, %int32_4802, %int8_4803, %int128_4804 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4191 = torch.aten.view %4188, %4190 : !torch.vtensor<[4,?,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %4191, [%292], affine_map<()[s0] -> (s0 * 4, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int1_4805 = torch.constant.int 1
    %int1_4806 = torch.constant.int 1
    %4192 = torch.aten.add.Scalar %4162, %int1_4805, %int1_4806 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %4192, [%292], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int4_4807 = torch.constant.int 4
    %4193 = torch.aten.mul.int %int4_4807, %425 : !torch.int, !torch.int -> !torch.int
    %4194 = torch.prim.ListConstruct %4193 : (!torch.int) -> !torch.list<int>
    %4195 = torch.aten.view %4192, %4194 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %4195, [%292], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %4196 = torch.prim.ListConstruct %4195 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_4808 = torch.constant.bool false
    %4197 = torch.aten.index_put %4186, %4196, %4191, %false_4808 : !torch.vtensor<[?,32,8,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,32,8,128],f16>, !torch.bool -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %4197, [%293], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int32_4809 = torch.constant.int 32
    %int2_4810 = torch.constant.int 2
    %int32_4811 = torch.constant.int 32
    %int8_4812 = torch.constant.int 8
    %int128_4813 = torch.constant.int 128
    %4198 = torch.prim.ListConstruct %416, %int32_4809, %int2_4810, %int32_4811, %int8_4812, %int128_4813 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4199 = torch.aten.view %4197, %4198 : !torch.vtensor<[?,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f16>
    torch.bind_symbolic_shape %4199, [%293], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f16>
    %int2097152_4814 = torch.constant.int 2097152
    %4200 = torch.prim.ListConstruct %416, %int2097152_4814 : (!torch.int, !torch.int) -> !torch.list<int>
    %4201 = torch.aten.view %4199, %4200 : !torch.vtensor<[?,32,2,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,2097152],f16>
    torch.bind_symbolic_shape %4201, [%293], affine_map<()[s0] -> (s0, 2097152)> : !torch.vtensor<[?,2097152],f16>
    %int-2_4815 = torch.constant.int -2
    %4202 = torch.aten.unsqueeze %4160, %int-2_4815 : !torch.vtensor<[4,?,8,128],f16>, !torch.int -> !torch.vtensor<[4,?,8,1,128],f16>
    torch.bind_symbolic_shape %4202, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 1, 128)> : !torch.vtensor<[4,?,8,1,128],f16>
    %int4_4816 = torch.constant.int 4
    %int8_4817 = torch.constant.int 8
    %int4_4818 = torch.constant.int 4
    %int128_4819 = torch.constant.int 128
    %4203 = torch.prim.ListConstruct %int4_4816, %4146, %int8_4817, %int4_4818, %int128_4819 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_4820 = torch.constant.bool false
    %4204 = torch.aten.expand %4202, %4203, %false_4820 : !torch.vtensor<[4,?,8,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,8,4,128],f16>
    torch.bind_symbolic_shape %4204, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 4, 128)> : !torch.vtensor<[4,?,8,4,128],f16>
    %int0_4821 = torch.constant.int 0
    %4205 = torch.aten.clone %4204, %int0_4821 : !torch.vtensor<[4,?,8,4,128],f16>, !torch.int -> !torch.vtensor<[4,?,8,4,128],f16>
    torch.bind_symbolic_shape %4205, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 4, 128)> : !torch.vtensor<[4,?,8,4,128],f16>
    %int4_4822 = torch.constant.int 4
    %int32_4823 = torch.constant.int 32
    %int128_4824 = torch.constant.int 128
    %4206 = torch.prim.ListConstruct %int4_4822, %4146, %int32_4823, %int128_4824 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4207 = torch.aten._unsafe_view %4205, %4206 : !torch.vtensor<[4,?,8,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %4207, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int-2_4825 = torch.constant.int -2
    %4208 = torch.aten.unsqueeze %4102, %int-2_4825 : !torch.vtensor<[4,?,8,128],f16>, !torch.int -> !torch.vtensor<[4,?,8,1,128],f16>
    torch.bind_symbolic_shape %4208, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 1, 128)> : !torch.vtensor<[4,?,8,1,128],f16>
    %int1_4826 = torch.constant.int 1
    %4209 = torch.aten.size.int %4096, %int1_4826 : !torch.vtensor<[4,?,1024],f16>, !torch.int -> !torch.int
    %int4_4827 = torch.constant.int 4
    %int8_4828 = torch.constant.int 8
    %int4_4829 = torch.constant.int 4
    %int128_4830 = torch.constant.int 128
    %4210 = torch.prim.ListConstruct %int4_4827, %4209, %int8_4828, %int4_4829, %int128_4830 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_4831 = torch.constant.bool false
    %4211 = torch.aten.expand %4208, %4210, %false_4831 : !torch.vtensor<[4,?,8,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,8,4,128],f16>
    torch.bind_symbolic_shape %4211, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 4, 128)> : !torch.vtensor<[4,?,8,4,128],f16>
    %int0_4832 = torch.constant.int 0
    %4212 = torch.aten.clone %4211, %int0_4832 : !torch.vtensor<[4,?,8,4,128],f16>, !torch.int -> !torch.vtensor<[4,?,8,4,128],f16>
    torch.bind_symbolic_shape %4212, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 4, 128)> : !torch.vtensor<[4,?,8,4,128],f16>
    %int4_4833 = torch.constant.int 4
    %int32_4834 = torch.constant.int 32
    %int128_4835 = torch.constant.int 128
    %4213 = torch.prim.ListConstruct %int4_4833, %4209, %int32_4834, %int128_4835 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4214 = torch.aten._unsafe_view %4212, %4213 : !torch.vtensor<[4,?,8,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %4214, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int1_4836 = torch.constant.int 1
    %int2_4837 = torch.constant.int 2
    %4215 = torch.aten.transpose.int %4131, %int1_4836, %int2_4837 : !torch.vtensor<[4,?,32,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,32,?,128],f16>
    torch.bind_symbolic_shape %4215, [%292], affine_map<()[s0] -> (4, 32, s0 * 32, 128)> : !torch.vtensor<[4,32,?,128],f16>
    %int1_4838 = torch.constant.int 1
    %int2_4839 = torch.constant.int 2
    %4216 = torch.aten.transpose.int %4207, %int1_4838, %int2_4839 : !torch.vtensor<[4,?,32,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,32,?,128],f16>
    torch.bind_symbolic_shape %4216, [%292], affine_map<()[s0] -> (4, 32, s0 * 32, 128)> : !torch.vtensor<[4,32,?,128],f16>
    %int1_4840 = torch.constant.int 1
    %int2_4841 = torch.constant.int 2
    %4217 = torch.aten.transpose.int %4214, %int1_4840, %int2_4841 : !torch.vtensor<[4,?,32,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,32,?,128],f16>
    torch.bind_symbolic_shape %4217, [%292], affine_map<()[s0] -> (4, 32, s0 * 32, 128)> : !torch.vtensor<[4,32,?,128],f16>
    %float0.000000e00_4842 = torch.constant.float 0.000000e+00
    %false_4843 = torch.constant.bool false
    %none_4844 = torch.constant.none
    %4218:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%4215, %4216, %4217, %float0.000000e00_4842, %false_4843, %320, %none_4844) : (!torch.vtensor<[4,32,?,128],f16>, !torch.vtensor<[4,32,?,128],f16>, !torch.vtensor<[4,32,?,128],f16>, !torch.float, !torch.bool, !torch.vtensor<[4,1,?,?],f16>, !torch.none) -> (!torch.vtensor<[4,32,?,128],f16>, !torch.vtensor<[4,32,?],f32>) 
    torch.bind_symbolic_shape %4218#0, [%292], affine_map<()[s0] -> (4, 32, s0 * 32, 128)> : !torch.vtensor<[4,32,?,128],f16>
    %int1_4845 = torch.constant.int 1
    %int2_4846 = torch.constant.int 2
    %4219 = torch.aten.transpose.int %4218#0, %int1_4845, %int2_4846 : !torch.vtensor<[4,32,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %4219, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int4_4847 = torch.constant.int 4
    %int4096_4848 = torch.constant.int 4096
    %4220 = torch.prim.ListConstruct %int4_4847, %4117, %int4096_4848 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4221 = torch.aten.view %4219, %4220 : !torch.vtensor<[4,?,32,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4221, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int-2_4849 = torch.constant.int -2
    %int-1_4850 = torch.constant.int -1
    %4222 = torch.aten.transpose.int %176, %int-2_4849, %int-1_4850 : !torch.vtensor<[4096,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,4096],f16>
    %int4_4851 = torch.constant.int 4
    %4223 = torch.aten.mul.int %int4_4851, %4117 : !torch.int, !torch.int -> !torch.int
    %int4096_4852 = torch.constant.int 4096
    %4224 = torch.prim.ListConstruct %4223, %int4096_4852 : (!torch.int, !torch.int) -> !torch.list<int>
    %4225 = torch.aten.view %4221, %4224 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %4225, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %4226 = torch.aten.mm %4225, %4222 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %4226, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_4853 = torch.constant.int 4
    %int4096_4854 = torch.constant.int 4096
    %4227 = torch.prim.ListConstruct %int4_4853, %4117, %int4096_4854 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4228 = torch.aten.view %4226, %4227 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4228, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_4855 = torch.constant.int 1
    %4229 = torch.aten.add.Tensor %4066, %4228, %int1_4855 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4229, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int6_4856 = torch.constant.int 6
    %4230 = torch.prims.convert_element_type %4229, %int6_4856 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %4230, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_4857 = torch.constant.int 2
    %4231 = torch.aten.pow.Tensor_Scalar %4230, %int2_4857 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %4231, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int-1_4858 = torch.constant.int -1
    %4232 = torch.prim.ListConstruct %int-1_4858 : (!torch.int) -> !torch.list<int>
    %true_4859 = torch.constant.bool true
    %none_4860 = torch.constant.none
    %4233 = torch.aten.mean.dim %4231, %4232, %true_4859, %none_4860 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %4233, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_4861 = torch.constant.float 9.9999997473787516E-6
    %int1_4862 = torch.constant.int 1
    %4234 = torch.aten.add.Scalar %4233, %float9.999990e-06_4861, %int1_4862 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %4234, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %4235 = torch.aten.rsqrt %4234 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %4235, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %4236 = torch.aten.mul.Tensor %4230, %4235 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %4236, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %4237 = torch.aten.mul.Tensor %177, %4236 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %4237, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int5_4863 = torch.constant.int 5
    %4238 = torch.prims.convert_element_type %4237, %int5_4863 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4238, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int-2_4864 = torch.constant.int -2
    %int-1_4865 = torch.constant.int -1
    %4239 = torch.aten.transpose.int %178, %int-2_4864, %int-1_4865 : !torch.vtensor<[14336,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,14336],f16>
    %int4_4866 = torch.constant.int 4
    %4240 = torch.aten.mul.int %int4_4866, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_4867 = torch.constant.int 4096
    %4241 = torch.prim.ListConstruct %4240, %int4096_4867 : (!torch.int, !torch.int) -> !torch.list<int>
    %4242 = torch.aten.view %4238, %4241 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %4242, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %4243 = torch.aten.mm %4242, %4239 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,14336],f16> -> !torch.vtensor<[?,14336],f16>
    torch.bind_symbolic_shape %4243, [%292], affine_map<()[s0] -> (s0 * 128, 14336)> : !torch.vtensor<[?,14336],f16>
    %int4_4868 = torch.constant.int 4
    %int14336_4869 = torch.constant.int 14336
    %4244 = torch.prim.ListConstruct %int4_4868, %294, %int14336_4869 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4245 = torch.aten.view %4243, %4244 : !torch.vtensor<[?,14336],f16>, !torch.list<int> -> !torch.vtensor<[4,?,14336],f16>
    torch.bind_symbolic_shape %4245, [%292], affine_map<()[s0] -> (4, s0 * 32, 14336)> : !torch.vtensor<[4,?,14336],f16>
    %4246 = torch.aten.silu %4245 : !torch.vtensor<[4,?,14336],f16> -> !torch.vtensor<[4,?,14336],f16>
    torch.bind_symbolic_shape %4246, [%292], affine_map<()[s0] -> (4, s0 * 32, 14336)> : !torch.vtensor<[4,?,14336],f16>
    %int-2_4870 = torch.constant.int -2
    %int-1_4871 = torch.constant.int -1
    %4247 = torch.aten.transpose.int %179, %int-2_4870, %int-1_4871 : !torch.vtensor<[14336,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,14336],f16>
    %int4_4872 = torch.constant.int 4
    %4248 = torch.aten.mul.int %int4_4872, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_4873 = torch.constant.int 4096
    %4249 = torch.prim.ListConstruct %4248, %int4096_4873 : (!torch.int, !torch.int) -> !torch.list<int>
    %4250 = torch.aten.view %4238, %4249 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %4250, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %4251 = torch.aten.mm %4250, %4247 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,14336],f16> -> !torch.vtensor<[?,14336],f16>
    torch.bind_symbolic_shape %4251, [%292], affine_map<()[s0] -> (s0 * 128, 14336)> : !torch.vtensor<[?,14336],f16>
    %int4_4874 = torch.constant.int 4
    %int14336_4875 = torch.constant.int 14336
    %4252 = torch.prim.ListConstruct %int4_4874, %294, %int14336_4875 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4253 = torch.aten.view %4251, %4252 : !torch.vtensor<[?,14336],f16>, !torch.list<int> -> !torch.vtensor<[4,?,14336],f16>
    torch.bind_symbolic_shape %4253, [%292], affine_map<()[s0] -> (4, s0 * 32, 14336)> : !torch.vtensor<[4,?,14336],f16>
    %4254 = torch.aten.mul.Tensor %4246, %4253 : !torch.vtensor<[4,?,14336],f16>, !torch.vtensor<[4,?,14336],f16> -> !torch.vtensor<[4,?,14336],f16>
    torch.bind_symbolic_shape %4254, [%292], affine_map<()[s0] -> (4, s0 * 32, 14336)> : !torch.vtensor<[4,?,14336],f16>
    %int-2_4876 = torch.constant.int -2
    %int-1_4877 = torch.constant.int -1
    %4255 = torch.aten.transpose.int %180, %int-2_4876, %int-1_4877 : !torch.vtensor<[4096,14336],f16>, !torch.int, !torch.int -> !torch.vtensor<[14336,4096],f16>
    %int1_4878 = torch.constant.int 1
    %4256 = torch.aten.size.int %4245, %int1_4878 : !torch.vtensor<[4,?,14336],f16>, !torch.int -> !torch.int
    %int4_4879 = torch.constant.int 4
    %4257 = torch.aten.mul.int %int4_4879, %4256 : !torch.int, !torch.int -> !torch.int
    %int14336_4880 = torch.constant.int 14336
    %4258 = torch.prim.ListConstruct %4257, %int14336_4880 : (!torch.int, !torch.int) -> !torch.list<int>
    %4259 = torch.aten.view %4254, %4258 : !torch.vtensor<[4,?,14336],f16>, !torch.list<int> -> !torch.vtensor<[?,14336],f16>
    torch.bind_symbolic_shape %4259, [%292], affine_map<()[s0] -> (s0 * 128, 14336)> : !torch.vtensor<[?,14336],f16>
    %4260 = torch.aten.mm %4259, %4255 : !torch.vtensor<[?,14336],f16>, !torch.vtensor<[14336,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %4260, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_4881 = torch.constant.int 4
    %int4096_4882 = torch.constant.int 4096
    %4261 = torch.prim.ListConstruct %int4_4881, %4256, %int4096_4882 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4262 = torch.aten.view %4260, %4261 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4262, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_4883 = torch.constant.int 1
    %4263 = torch.aten.add.Tensor %4229, %4262, %int1_4883 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4263, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int6_4884 = torch.constant.int 6
    %4264 = torch.prims.convert_element_type %4263, %int6_4884 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %4264, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_4885 = torch.constant.int 2
    %4265 = torch.aten.pow.Tensor_Scalar %4264, %int2_4885 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %4265, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int-1_4886 = torch.constant.int -1
    %4266 = torch.prim.ListConstruct %int-1_4886 : (!torch.int) -> !torch.list<int>
    %true_4887 = torch.constant.bool true
    %none_4888 = torch.constant.none
    %4267 = torch.aten.mean.dim %4265, %4266, %true_4887, %none_4888 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %4267, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_4889 = torch.constant.float 9.9999997473787516E-6
    %int1_4890 = torch.constant.int 1
    %4268 = torch.aten.add.Scalar %4267, %float9.999990e-06_4889, %int1_4890 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %4268, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %4269 = torch.aten.rsqrt %4268 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %4269, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %4270 = torch.aten.mul.Tensor %4264, %4269 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %4270, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %4271 = torch.aten.mul.Tensor %181, %4270 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %4271, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int5_4891 = torch.constant.int 5
    %4272 = torch.prims.convert_element_type %4271, %int5_4891 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4272, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int-2_4892 = torch.constant.int -2
    %int-1_4893 = torch.constant.int -1
    %4273 = torch.aten.transpose.int %182, %int-2_4892, %int-1_4893 : !torch.vtensor<[4096,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,4096],f16>
    %int4_4894 = torch.constant.int 4
    %4274 = torch.aten.mul.int %int4_4894, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_4895 = torch.constant.int 4096
    %4275 = torch.prim.ListConstruct %4274, %int4096_4895 : (!torch.int, !torch.int) -> !torch.list<int>
    %4276 = torch.aten.view %4272, %4275 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %4276, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %4277 = torch.aten.mm %4276, %4273 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %4277, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_4896 = torch.constant.int 4
    %int4096_4897 = torch.constant.int 4096
    %4278 = torch.prim.ListConstruct %int4_4896, %294, %int4096_4897 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4279 = torch.aten.view %4277, %4278 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4279, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int-2_4898 = torch.constant.int -2
    %int-1_4899 = torch.constant.int -1
    %4280 = torch.aten.transpose.int %183, %int-2_4898, %int-1_4899 : !torch.vtensor<[1024,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,1024],f16>
    %int4_4900 = torch.constant.int 4
    %4281 = torch.aten.mul.int %int4_4900, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_4901 = torch.constant.int 4096
    %4282 = torch.prim.ListConstruct %4281, %int4096_4901 : (!torch.int, !torch.int) -> !torch.list<int>
    %4283 = torch.aten.view %4272, %4282 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %4283, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %4284 = torch.aten.mm %4283, %4280 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1024],f16> -> !torch.vtensor<[?,1024],f16>
    torch.bind_symbolic_shape %4284, [%292], affine_map<()[s0] -> (s0 * 128, 1024)> : !torch.vtensor<[?,1024],f16>
    %int4_4902 = torch.constant.int 4
    %int1024_4903 = torch.constant.int 1024
    %4285 = torch.prim.ListConstruct %int4_4902, %294, %int1024_4903 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4286 = torch.aten.view %4284, %4285 : !torch.vtensor<[?,1024],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1024],f16>
    torch.bind_symbolic_shape %4286, [%292], affine_map<()[s0] -> (4, s0 * 32, 1024)> : !torch.vtensor<[4,?,1024],f16>
    %int-2_4904 = torch.constant.int -2
    %int-1_4905 = torch.constant.int -1
    %4287 = torch.aten.transpose.int %184, %int-2_4904, %int-1_4905 : !torch.vtensor<[1024,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,1024],f16>
    %int4_4906 = torch.constant.int 4
    %4288 = torch.aten.mul.int %int4_4906, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_4907 = torch.constant.int 4096
    %4289 = torch.prim.ListConstruct %4288, %int4096_4907 : (!torch.int, !torch.int) -> !torch.list<int>
    %4290 = torch.aten.view %4272, %4289 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %4290, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %4291 = torch.aten.mm %4290, %4287 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1024],f16> -> !torch.vtensor<[?,1024],f16>
    torch.bind_symbolic_shape %4291, [%292], affine_map<()[s0] -> (s0 * 128, 1024)> : !torch.vtensor<[?,1024],f16>
    %int4_4908 = torch.constant.int 4
    %int1024_4909 = torch.constant.int 1024
    %4292 = torch.prim.ListConstruct %int4_4908, %294, %int1024_4909 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4293 = torch.aten.view %4291, %4292 : !torch.vtensor<[?,1024],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1024],f16>
    torch.bind_symbolic_shape %4293, [%292], affine_map<()[s0] -> (4, s0 * 32, 1024)> : !torch.vtensor<[4,?,1024],f16>
    %int4_4910 = torch.constant.int 4
    %int32_4911 = torch.constant.int 32
    %int128_4912 = torch.constant.int 128
    %4294 = torch.prim.ListConstruct %int4_4910, %294, %int32_4911, %int128_4912 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4295 = torch.aten.view %4279, %4294 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %4295, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int4_4913 = torch.constant.int 4
    %int8_4914 = torch.constant.int 8
    %int128_4915 = torch.constant.int 128
    %4296 = torch.prim.ListConstruct %int4_4913, %294, %int8_4914, %int128_4915 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4297 = torch.aten.view %4286, %4296 : !torch.vtensor<[4,?,1024],f16>, !torch.list<int> -> !torch.vtensor<[4,?,8,128],f16>
    torch.bind_symbolic_shape %4297, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 128)> : !torch.vtensor<[4,?,8,128],f16>
    %int4_4916 = torch.constant.int 4
    %int8_4917 = torch.constant.int 8
    %int128_4918 = torch.constant.int 128
    %4298 = torch.prim.ListConstruct %int4_4916, %294, %int8_4917, %int128_4918 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4299 = torch.aten.view %4293, %4298 : !torch.vtensor<[4,?,1024],f16>, !torch.list<int> -> !torch.vtensor<[4,?,8,128],f16>
    torch.bind_symbolic_shape %4299, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 128)> : !torch.vtensor<[4,?,8,128],f16>
    %int131072_4919 = torch.constant.int 131072
    %none_4920 = torch.constant.none
    %none_4921 = torch.constant.none
    %cpu_4922 = torch.constant.device "cpu"
    %false_4923 = torch.constant.bool false
    %4300 = torch.aten.arange %int131072_4919, %none_4920, %none_4921, %cpu_4922, %false_4923 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64>
    %int0_4924 = torch.constant.int 0
    %int128_4925 = torch.constant.int 128
    %int2_4926 = torch.constant.int 2
    %none_4927 = torch.constant.none
    %none_4928 = torch.constant.none
    %cpu_4929 = torch.constant.device "cpu"
    %false_4930 = torch.constant.bool false
    %4301 = torch.aten.arange.start_step %int0_4924, %int128_4925, %int2_4926, %none_4927, %none_4928, %cpu_4929, %false_4930 : !torch.int, !torch.int, !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64>
    %int0_4931 = torch.constant.int 0
    %int0_4932 = torch.constant.int 0
    %int64_4933 = torch.constant.int 64
    %int1_4934 = torch.constant.int 1
    %4302 = torch.aten.slice.Tensor %4301, %int0_4931, %int0_4932, %int64_4933, %int1_4934 : !torch.vtensor<[64],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[64],si64>
    %int6_4935 = torch.constant.int 6
    %4303 = torch.prims.convert_element_type %4302, %int6_4935 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32>
    %int128_4936 = torch.constant.int 128
    %4304 = torch.aten.div.Scalar %4303, %int128_4936 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32>
    %float5.000000e05_4937 = torch.constant.float 5.000000e+05
    %4305 = torch.aten.pow.Scalar %float5.000000e05_4937, %4304 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %4306 = torch.aten.reciprocal %4305 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %float1.000000e00_4938 = torch.constant.float 1.000000e+00
    %4307 = torch.aten.mul.Scalar %4306, %float1.000000e00_4938 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32>
    %int131072_4939 = torch.constant.int 131072
    %int1_4940 = torch.constant.int 1
    %4308 = torch.prim.ListConstruct %int131072_4939, %int1_4940 : (!torch.int, !torch.int) -> !torch.list<int>
    %4309 = torch.aten.view %4300, %4308 : !torch.vtensor<[131072],si64>, !torch.list<int> -> !torch.vtensor<[131072,1],si64>
    %4310 = torch.aten.mul.Tensor %4309, %4307 : !torch.vtensor<[131072,1],si64>, !torch.vtensor<[64],f32> -> !torch.vtensor<[131072,64],f32>
    %4311 = torch.aten.cos %4310 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %4312 = torch.aten.sin %4310 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %4313 = torch.aten.complex %4311, %4312 : !torch.vtensor<[131072,64],f32>, !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],complex<f32>>
    %int1_4941 = torch.constant.int 1
    %4314 = torch.aten.size.int %4279, %int1_4941 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.int
    %int0_4942 = torch.constant.int 0
    %4315 = torch.aten.add.int %int0_4942, %4314 : !torch.int, !torch.int -> !torch.int
    %int0_4943 = torch.constant.int 0
    %int0_4944 = torch.constant.int 0
    %int1_4945 = torch.constant.int 1
    %4316 = torch.aten.slice.Tensor %4313, %int0_4943, %int0_4944, %4315, %int1_4945 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %4316, [%292], affine_map<()[s0] -> (s0 * 32, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_4946 = torch.constant.int 1
    %int0_4947 = torch.constant.int 0
    %int9223372036854775807_4948 = torch.constant.int 9223372036854775807
    %int1_4949 = torch.constant.int 1
    %4317 = torch.aten.slice.Tensor %4316, %int1_4946, %int0_4947, %int9223372036854775807_4948, %int1_4949 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %4317, [%292], affine_map<()[s0] -> (s0 * 32, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_4950 = torch.constant.int 0
    %4318 = torch.aten.unsqueeze %4317, %int0_4950 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %4318, [%292], affine_map<()[s0] -> (1, s0 * 32, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_4951 = torch.constant.int 2
    %4319 = torch.aten.unsqueeze %4318, %int2_4951 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %4319, [%292], affine_map<()[s0] -> (1, s0 * 32, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_4952 = torch.constant.int 3
    %int0_4953 = torch.constant.int 0
    %int9223372036854775807_4954 = torch.constant.int 9223372036854775807
    %int1_4955 = torch.constant.int 1
    %4320 = torch.aten.slice.Tensor %4319, %int3_4952, %int0_4953, %int9223372036854775807_4954, %int1_4955 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %4320, [%292], affine_map<()[s0] -> (1, s0 * 32, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %4321 = torch_c.to_builtin_tensor %4295 : !torch.vtensor<[4,?,32,128],f16> -> tensor<4x?x32x128xf16>
    %c1_4956 = arith.constant 1 : index
    %dim_4957 = tensor.dim %4321, %c1_4956 : tensor<4x?x32x128xf16>
    %4322 = flow.tensor.bitcast %4321 : tensor<4x?x32x128xf16>{%dim_4957} -> tensor<4x?x32x64xcomplex<f16>>{%dim_4957}
    %4323 = torch_c.from_builtin_tensor %4322 : tensor<4x?x32x64xcomplex<f16>> -> !torch.vtensor<[4,?,32,64],complex<f16>>
    torch.bind_symbolic_shape %4323, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 64)> : !torch.vtensor<[4,?,32,64],complex<f16>>
    %4324 = torch.aten.mul.Tensor %4323, %4320 : !torch.vtensor<[4,?,32,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,32,64],complex<f32>>
    torch.bind_symbolic_shape %4324, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 64)> : !torch.vtensor<[4,?,32,64],complex<f32>>
    %4325 = torch_c.to_builtin_tensor %4324 : !torch.vtensor<[4,?,32,64],complex<f32>> -> tensor<4x?x32x64xcomplex<f32>>
    %c1_4958 = arith.constant 1 : index
    %dim_4959 = tensor.dim %4325, %c1_4958 : tensor<4x?x32x64xcomplex<f32>>
    %4326 = flow.tensor.bitcast %4325 : tensor<4x?x32x64xcomplex<f32>>{%dim_4959} -> tensor<4x?x32x128xf32>{%dim_4959}
    %4327 = torch_c.from_builtin_tensor %4326 : tensor<4x?x32x128xf32> -> !torch.vtensor<[4,?,32,128],f32>
    torch.bind_symbolic_shape %4327, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f32>
    %int5_4960 = torch.constant.int 5
    %4328 = torch.prims.convert_element_type %4327, %int5_4960 : !torch.vtensor<[4,?,32,128],f32>, !torch.int -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %4328, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int131072_4961 = torch.constant.int 131072
    %none_4962 = torch.constant.none
    %none_4963 = torch.constant.none
    %cpu_4964 = torch.constant.device "cpu"
    %false_4965 = torch.constant.bool false
    %4329 = torch.aten.arange %int131072_4961, %none_4962, %none_4963, %cpu_4964, %false_4965 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64>
    %int0_4966 = torch.constant.int 0
    %int128_4967 = torch.constant.int 128
    %int2_4968 = torch.constant.int 2
    %none_4969 = torch.constant.none
    %none_4970 = torch.constant.none
    %cpu_4971 = torch.constant.device "cpu"
    %false_4972 = torch.constant.bool false
    %4330 = torch.aten.arange.start_step %int0_4966, %int128_4967, %int2_4968, %none_4969, %none_4970, %cpu_4971, %false_4972 : !torch.int, !torch.int, !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64>
    %int0_4973 = torch.constant.int 0
    %int0_4974 = torch.constant.int 0
    %int64_4975 = torch.constant.int 64
    %int1_4976 = torch.constant.int 1
    %4331 = torch.aten.slice.Tensor %4330, %int0_4973, %int0_4974, %int64_4975, %int1_4976 : !torch.vtensor<[64],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[64],si64>
    %int6_4977 = torch.constant.int 6
    %4332 = torch.prims.convert_element_type %4331, %int6_4977 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32>
    %int128_4978 = torch.constant.int 128
    %4333 = torch.aten.div.Scalar %4332, %int128_4978 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32>
    %float5.000000e05_4979 = torch.constant.float 5.000000e+05
    %4334 = torch.aten.pow.Scalar %float5.000000e05_4979, %4333 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %4335 = torch.aten.reciprocal %4334 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %float1.000000e00_4980 = torch.constant.float 1.000000e+00
    %4336 = torch.aten.mul.Scalar %4335, %float1.000000e00_4980 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32>
    %int131072_4981 = torch.constant.int 131072
    %int1_4982 = torch.constant.int 1
    %4337 = torch.prim.ListConstruct %int131072_4981, %int1_4982 : (!torch.int, !torch.int) -> !torch.list<int>
    %4338 = torch.aten.view %4329, %4337 : !torch.vtensor<[131072],si64>, !torch.list<int> -> !torch.vtensor<[131072,1],si64>
    %4339 = torch.aten.mul.Tensor %4338, %4336 : !torch.vtensor<[131072,1],si64>, !torch.vtensor<[64],f32> -> !torch.vtensor<[131072,64],f32>
    %4340 = torch.aten.cos %4339 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %4341 = torch.aten.sin %4339 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %4342 = torch.aten.complex %4340, %4341 : !torch.vtensor<[131072,64],f32>, !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],complex<f32>>
    %int1_4983 = torch.constant.int 1
    %4343 = torch.aten.size.int %4286, %int1_4983 : !torch.vtensor<[4,?,1024],f16>, !torch.int -> !torch.int
    %int0_4984 = torch.constant.int 0
    %4344 = torch.aten.add.int %int0_4984, %4343 : !torch.int, !torch.int -> !torch.int
    %int0_4985 = torch.constant.int 0
    %int0_4986 = torch.constant.int 0
    %int1_4987 = torch.constant.int 1
    %4345 = torch.aten.slice.Tensor %4342, %int0_4985, %int0_4986, %4344, %int1_4987 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %4345, [%292], affine_map<()[s0] -> (s0 * 32, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_4988 = torch.constant.int 1
    %int0_4989 = torch.constant.int 0
    %int9223372036854775807_4990 = torch.constant.int 9223372036854775807
    %int1_4991 = torch.constant.int 1
    %4346 = torch.aten.slice.Tensor %4345, %int1_4988, %int0_4989, %int9223372036854775807_4990, %int1_4991 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %4346, [%292], affine_map<()[s0] -> (s0 * 32, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_4992 = torch.constant.int 0
    %4347 = torch.aten.unsqueeze %4346, %int0_4992 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %4347, [%292], affine_map<()[s0] -> (1, s0 * 32, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_4993 = torch.constant.int 2
    %4348 = torch.aten.unsqueeze %4347, %int2_4993 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %4348, [%292], affine_map<()[s0] -> (1, s0 * 32, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_4994 = torch.constant.int 3
    %int0_4995 = torch.constant.int 0
    %int9223372036854775807_4996 = torch.constant.int 9223372036854775807
    %int1_4997 = torch.constant.int 1
    %4349 = torch.aten.slice.Tensor %4348, %int3_4994, %int0_4995, %int9223372036854775807_4996, %int1_4997 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %4349, [%292], affine_map<()[s0] -> (1, s0 * 32, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %4350 = torch_c.to_builtin_tensor %4297 : !torch.vtensor<[4,?,8,128],f16> -> tensor<4x?x8x128xf16>
    %c1_4998 = arith.constant 1 : index
    %dim_4999 = tensor.dim %4350, %c1_4998 : tensor<4x?x8x128xf16>
    %4351 = flow.tensor.bitcast %4350 : tensor<4x?x8x128xf16>{%dim_4999} -> tensor<4x?x8x64xcomplex<f16>>{%dim_4999}
    %4352 = torch_c.from_builtin_tensor %4351 : tensor<4x?x8x64xcomplex<f16>> -> !torch.vtensor<[4,?,8,64],complex<f16>>
    torch.bind_symbolic_shape %4352, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 64)> : !torch.vtensor<[4,?,8,64],complex<f16>>
    %4353 = torch.aten.mul.Tensor %4352, %4349 : !torch.vtensor<[4,?,8,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,8,64],complex<f32>>
    torch.bind_symbolic_shape %4353, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 64)> : !torch.vtensor<[4,?,8,64],complex<f32>>
    %4354 = torch_c.to_builtin_tensor %4353 : !torch.vtensor<[4,?,8,64],complex<f32>> -> tensor<4x?x8x64xcomplex<f32>>
    %c1_5000 = arith.constant 1 : index
    %dim_5001 = tensor.dim %4354, %c1_5000 : tensor<4x?x8x64xcomplex<f32>>
    %4355 = flow.tensor.bitcast %4354 : tensor<4x?x8x64xcomplex<f32>>{%dim_5001} -> tensor<4x?x8x128xf32>{%dim_5001}
    %4356 = torch_c.from_builtin_tensor %4355 : tensor<4x?x8x128xf32> -> !torch.vtensor<[4,?,8,128],f32>
    torch.bind_symbolic_shape %4356, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 128)> : !torch.vtensor<[4,?,8,128],f32>
    %int5_5002 = torch.constant.int 5
    %4357 = torch.prims.convert_element_type %4356, %int5_5002 : !torch.vtensor<[4,?,8,128],f32>, !torch.int -> !torch.vtensor<[4,?,8,128],f16>
    torch.bind_symbolic_shape %4357, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 128)> : !torch.vtensor<[4,?,8,128],f16>
    %int64_5003 = torch.constant.int 64
    %4358 = torch.aten.mul.Scalar %arg2, %int64_5003 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %4358, [%292], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int40 = torch.constant.int 40
    %int1_5004 = torch.constant.int 1
    %4359 = torch.aten.add.Scalar %4358, %int40, %int1_5004 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %4359, [%292], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int4_5005 = torch.constant.int 4
    %int32_5006 = torch.constant.int 32
    %int8_5007 = torch.constant.int 8
    %int128_5008 = torch.constant.int 128
    %4360 = torch.prim.ListConstruct %int4_5005, %425, %int32_5006, %int8_5007, %int128_5008 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4361 = torch.aten.view %4357, %4360 : !torch.vtensor<[4,?,8,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,8,128],f16>
    torch.bind_symbolic_shape %4361, [%292], affine_map<()[s0] -> (4, s0, 32, 8, 128)> : !torch.vtensor<[4,?,32,8,128],f16>
    %int4_5009 = torch.constant.int 4
    %4362 = torch.aten.mul.int %int4_5009, %425 : !torch.int, !torch.int -> !torch.int
    %int32_5010 = torch.constant.int 32
    %int8_5011 = torch.constant.int 8
    %int128_5012 = torch.constant.int 128
    %4363 = torch.prim.ListConstruct %4362, %int32_5010, %int8_5011, %int128_5012 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4364 = torch.aten.view %4361, %4363 : !torch.vtensor<[4,?,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %4364, [%292], affine_map<()[s0] -> (s0 * 4, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int4_5013 = torch.constant.int 4
    %4365 = torch.aten.mul.int %int4_5013, %425 : !torch.int, !torch.int -> !torch.int
    %4366 = torch.prim.ListConstruct %4365 : (!torch.int) -> !torch.list<int>
    %4367 = torch.aten.view %4359, %4366 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %4367, [%292], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int32_5014 = torch.constant.int 32
    %int2_5015 = torch.constant.int 2
    %int32_5016 = torch.constant.int 32
    %int8_5017 = torch.constant.int 8
    %int128_5018 = torch.constant.int 128
    %4368 = torch.prim.ListConstruct %416, %int32_5014, %int2_5015, %int32_5016, %int8_5017, %int128_5018 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4369 = torch.aten.view %4201, %4368 : !torch.vtensor<[?,2097152],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f16>
    torch.bind_symbolic_shape %4369, [%293], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f16>
    %int32_5019 = torch.constant.int 32
    %4370 = torch.aten.mul.int %416, %int32_5019 : !torch.int, !torch.int -> !torch.int
    %int2_5020 = torch.constant.int 2
    %4371 = torch.aten.mul.int %4370, %int2_5020 : !torch.int, !torch.int -> !torch.int
    %int32_5021 = torch.constant.int 32
    %int8_5022 = torch.constant.int 8
    %int128_5023 = torch.constant.int 128
    %4372 = torch.prim.ListConstruct %4371, %int32_5021, %int8_5022, %int128_5023 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4373 = torch.aten.view %4369, %4372 : !torch.vtensor<[?,32,2,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %4373, [%293], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %4374 = torch.prim.ListConstruct %4367 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_5024 = torch.constant.bool false
    %4375 = torch.aten.index_put %4373, %4374, %4364, %false_5024 : !torch.vtensor<[?,32,8,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,32,8,128],f16>, !torch.bool -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %4375, [%293], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int32_5025 = torch.constant.int 32
    %int2_5026 = torch.constant.int 2
    %int32_5027 = torch.constant.int 32
    %int8_5028 = torch.constant.int 8
    %int128_5029 = torch.constant.int 128
    %4376 = torch.prim.ListConstruct %416, %int32_5025, %int2_5026, %int32_5027, %int8_5028, %int128_5029 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4377 = torch.aten.view %4375, %4376 : !torch.vtensor<[?,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f16>
    torch.bind_symbolic_shape %4377, [%293], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f16>
    %int2097152_5030 = torch.constant.int 2097152
    %4378 = torch.prim.ListConstruct %416, %int2097152_5030 : (!torch.int, !torch.int) -> !torch.list<int>
    %4379 = torch.aten.view %4377, %4378 : !torch.vtensor<[?,32,2,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,2097152],f16>
    torch.bind_symbolic_shape %4379, [%293], affine_map<()[s0] -> (s0, 2097152)> : !torch.vtensor<[?,2097152],f16>
    %int32_5031 = torch.constant.int 32
    %int2_5032 = torch.constant.int 2
    %int32_5033 = torch.constant.int 32
    %int8_5034 = torch.constant.int 8
    %int128_5035 = torch.constant.int 128
    %4380 = torch.prim.ListConstruct %416, %int32_5031, %int2_5032, %int32_5033, %int8_5034, %int128_5035 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4381 = torch.aten.view %4379, %4380 : !torch.vtensor<[?,2097152],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f16>
    torch.bind_symbolic_shape %4381, [%293], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f16>
    %int32_5036 = torch.constant.int 32
    %int8_5037 = torch.constant.int 8
    %int128_5038 = torch.constant.int 128
    %4382 = torch.prim.ListConstruct %4371, %int32_5036, %int8_5037, %int128_5038 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4383 = torch.aten.view %4381, %4382 : !torch.vtensor<[?,32,2,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %4383, [%293], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int4_5039 = torch.constant.int 4
    %int32_5040 = torch.constant.int 32
    %int8_5041 = torch.constant.int 8
    %int128_5042 = torch.constant.int 128
    %4384 = torch.prim.ListConstruct %int4_5039, %425, %int32_5040, %int8_5041, %int128_5042 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4385 = torch.aten.view %4299, %4384 : !torch.vtensor<[4,?,8,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,8,128],f16>
    torch.bind_symbolic_shape %4385, [%292], affine_map<()[s0] -> (4, s0, 32, 8, 128)> : !torch.vtensor<[4,?,32,8,128],f16>
    %int4_5043 = torch.constant.int 4
    %4386 = torch.aten.mul.int %int4_5043, %425 : !torch.int, !torch.int -> !torch.int
    %int32_5044 = torch.constant.int 32
    %int8_5045 = torch.constant.int 8
    %int128_5046 = torch.constant.int 128
    %4387 = torch.prim.ListConstruct %4386, %int32_5044, %int8_5045, %int128_5046 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4388 = torch.aten.view %4385, %4387 : !torch.vtensor<[4,?,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %4388, [%292], affine_map<()[s0] -> (s0 * 4, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int1_5047 = torch.constant.int 1
    %int1_5048 = torch.constant.int 1
    %4389 = torch.aten.add.Scalar %4359, %int1_5047, %int1_5048 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %4389, [%292], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int4_5049 = torch.constant.int 4
    %4390 = torch.aten.mul.int %int4_5049, %425 : !torch.int, !torch.int -> !torch.int
    %4391 = torch.prim.ListConstruct %4390 : (!torch.int) -> !torch.list<int>
    %4392 = torch.aten.view %4389, %4391 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %4392, [%292], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %4393 = torch.prim.ListConstruct %4392 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_5050 = torch.constant.bool false
    %4394 = torch.aten.index_put %4383, %4393, %4388, %false_5050 : !torch.vtensor<[?,32,8,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,32,8,128],f16>, !torch.bool -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %4394, [%293], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int32_5051 = torch.constant.int 32
    %int2_5052 = torch.constant.int 2
    %int32_5053 = torch.constant.int 32
    %int8_5054 = torch.constant.int 8
    %int128_5055 = torch.constant.int 128
    %4395 = torch.prim.ListConstruct %416, %int32_5051, %int2_5052, %int32_5053, %int8_5054, %int128_5055 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4396 = torch.aten.view %4394, %4395 : !torch.vtensor<[?,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f16>
    torch.bind_symbolic_shape %4396, [%293], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f16>
    %int2097152_5056 = torch.constant.int 2097152
    %4397 = torch.prim.ListConstruct %416, %int2097152_5056 : (!torch.int, !torch.int) -> !torch.list<int>
    %4398 = torch.aten.view %4396, %4397 : !torch.vtensor<[?,32,2,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,2097152],f16>
    torch.bind_symbolic_shape %4398, [%293], affine_map<()[s0] -> (s0, 2097152)> : !torch.vtensor<[?,2097152],f16>
    %int-2_5057 = torch.constant.int -2
    %4399 = torch.aten.unsqueeze %4357, %int-2_5057 : !torch.vtensor<[4,?,8,128],f16>, !torch.int -> !torch.vtensor<[4,?,8,1,128],f16>
    torch.bind_symbolic_shape %4399, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 1, 128)> : !torch.vtensor<[4,?,8,1,128],f16>
    %int4_5058 = torch.constant.int 4
    %int8_5059 = torch.constant.int 8
    %int4_5060 = torch.constant.int 4
    %int128_5061 = torch.constant.int 128
    %4400 = torch.prim.ListConstruct %int4_5058, %4343, %int8_5059, %int4_5060, %int128_5061 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_5062 = torch.constant.bool false
    %4401 = torch.aten.expand %4399, %4400, %false_5062 : !torch.vtensor<[4,?,8,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,8,4,128],f16>
    torch.bind_symbolic_shape %4401, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 4, 128)> : !torch.vtensor<[4,?,8,4,128],f16>
    %int0_5063 = torch.constant.int 0
    %4402 = torch.aten.clone %4401, %int0_5063 : !torch.vtensor<[4,?,8,4,128],f16>, !torch.int -> !torch.vtensor<[4,?,8,4,128],f16>
    torch.bind_symbolic_shape %4402, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 4, 128)> : !torch.vtensor<[4,?,8,4,128],f16>
    %int4_5064 = torch.constant.int 4
    %int32_5065 = torch.constant.int 32
    %int128_5066 = torch.constant.int 128
    %4403 = torch.prim.ListConstruct %int4_5064, %4343, %int32_5065, %int128_5066 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4404 = torch.aten._unsafe_view %4402, %4403 : !torch.vtensor<[4,?,8,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %4404, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int-2_5067 = torch.constant.int -2
    %4405 = torch.aten.unsqueeze %4299, %int-2_5067 : !torch.vtensor<[4,?,8,128],f16>, !torch.int -> !torch.vtensor<[4,?,8,1,128],f16>
    torch.bind_symbolic_shape %4405, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 1, 128)> : !torch.vtensor<[4,?,8,1,128],f16>
    %int1_5068 = torch.constant.int 1
    %4406 = torch.aten.size.int %4293, %int1_5068 : !torch.vtensor<[4,?,1024],f16>, !torch.int -> !torch.int
    %int4_5069 = torch.constant.int 4
    %int8_5070 = torch.constant.int 8
    %int4_5071 = torch.constant.int 4
    %int128_5072 = torch.constant.int 128
    %4407 = torch.prim.ListConstruct %int4_5069, %4406, %int8_5070, %int4_5071, %int128_5072 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_5073 = torch.constant.bool false
    %4408 = torch.aten.expand %4405, %4407, %false_5073 : !torch.vtensor<[4,?,8,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,8,4,128],f16>
    torch.bind_symbolic_shape %4408, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 4, 128)> : !torch.vtensor<[4,?,8,4,128],f16>
    %int0_5074 = torch.constant.int 0
    %4409 = torch.aten.clone %4408, %int0_5074 : !torch.vtensor<[4,?,8,4,128],f16>, !torch.int -> !torch.vtensor<[4,?,8,4,128],f16>
    torch.bind_symbolic_shape %4409, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 4, 128)> : !torch.vtensor<[4,?,8,4,128],f16>
    %int4_5075 = torch.constant.int 4
    %int32_5076 = torch.constant.int 32
    %int128_5077 = torch.constant.int 128
    %4410 = torch.prim.ListConstruct %int4_5075, %4406, %int32_5076, %int128_5077 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4411 = torch.aten._unsafe_view %4409, %4410 : !torch.vtensor<[4,?,8,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %4411, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int1_5078 = torch.constant.int 1
    %int2_5079 = torch.constant.int 2
    %4412 = torch.aten.transpose.int %4328, %int1_5078, %int2_5079 : !torch.vtensor<[4,?,32,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,32,?,128],f16>
    torch.bind_symbolic_shape %4412, [%292], affine_map<()[s0] -> (4, 32, s0 * 32, 128)> : !torch.vtensor<[4,32,?,128],f16>
    %int1_5080 = torch.constant.int 1
    %int2_5081 = torch.constant.int 2
    %4413 = torch.aten.transpose.int %4404, %int1_5080, %int2_5081 : !torch.vtensor<[4,?,32,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,32,?,128],f16>
    torch.bind_symbolic_shape %4413, [%292], affine_map<()[s0] -> (4, 32, s0 * 32, 128)> : !torch.vtensor<[4,32,?,128],f16>
    %int1_5082 = torch.constant.int 1
    %int2_5083 = torch.constant.int 2
    %4414 = torch.aten.transpose.int %4411, %int1_5082, %int2_5083 : !torch.vtensor<[4,?,32,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,32,?,128],f16>
    torch.bind_symbolic_shape %4414, [%292], affine_map<()[s0] -> (4, 32, s0 * 32, 128)> : !torch.vtensor<[4,32,?,128],f16>
    %float0.000000e00_5084 = torch.constant.float 0.000000e+00
    %false_5085 = torch.constant.bool false
    %none_5086 = torch.constant.none
    %4415:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%4412, %4413, %4414, %float0.000000e00_5084, %false_5085, %320, %none_5086) : (!torch.vtensor<[4,32,?,128],f16>, !torch.vtensor<[4,32,?,128],f16>, !torch.vtensor<[4,32,?,128],f16>, !torch.float, !torch.bool, !torch.vtensor<[4,1,?,?],f16>, !torch.none) -> (!torch.vtensor<[4,32,?,128],f16>, !torch.vtensor<[4,32,?],f32>) 
    torch.bind_symbolic_shape %4415#0, [%292], affine_map<()[s0] -> (4, 32, s0 * 32, 128)> : !torch.vtensor<[4,32,?,128],f16>
    %int1_5087 = torch.constant.int 1
    %int2_5088 = torch.constant.int 2
    %4416 = torch.aten.transpose.int %4415#0, %int1_5087, %int2_5088 : !torch.vtensor<[4,32,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %4416, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int4_5089 = torch.constant.int 4
    %int4096_5090 = torch.constant.int 4096
    %4417 = torch.prim.ListConstruct %int4_5089, %4314, %int4096_5090 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4418 = torch.aten.view %4416, %4417 : !torch.vtensor<[4,?,32,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4418, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int-2_5091 = torch.constant.int -2
    %int-1_5092 = torch.constant.int -1
    %4419 = torch.aten.transpose.int %185, %int-2_5091, %int-1_5092 : !torch.vtensor<[4096,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,4096],f16>
    %int4_5093 = torch.constant.int 4
    %4420 = torch.aten.mul.int %int4_5093, %4314 : !torch.int, !torch.int -> !torch.int
    %int4096_5094 = torch.constant.int 4096
    %4421 = torch.prim.ListConstruct %4420, %int4096_5094 : (!torch.int, !torch.int) -> !torch.list<int>
    %4422 = torch.aten.view %4418, %4421 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %4422, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %4423 = torch.aten.mm %4422, %4419 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %4423, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_5095 = torch.constant.int 4
    %int4096_5096 = torch.constant.int 4096
    %4424 = torch.prim.ListConstruct %int4_5095, %4314, %int4096_5096 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4425 = torch.aten.view %4423, %4424 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4425, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_5097 = torch.constant.int 1
    %4426 = torch.aten.add.Tensor %4263, %4425, %int1_5097 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4426, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int6_5098 = torch.constant.int 6
    %4427 = torch.prims.convert_element_type %4426, %int6_5098 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %4427, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_5099 = torch.constant.int 2
    %4428 = torch.aten.pow.Tensor_Scalar %4427, %int2_5099 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %4428, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int-1_5100 = torch.constant.int -1
    %4429 = torch.prim.ListConstruct %int-1_5100 : (!torch.int) -> !torch.list<int>
    %true_5101 = torch.constant.bool true
    %none_5102 = torch.constant.none
    %4430 = torch.aten.mean.dim %4428, %4429, %true_5101, %none_5102 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %4430, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_5103 = torch.constant.float 9.9999997473787516E-6
    %int1_5104 = torch.constant.int 1
    %4431 = torch.aten.add.Scalar %4430, %float9.999990e-06_5103, %int1_5104 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %4431, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %4432 = torch.aten.rsqrt %4431 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %4432, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %4433 = torch.aten.mul.Tensor %4427, %4432 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %4433, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %4434 = torch.aten.mul.Tensor %186, %4433 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %4434, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int5_5105 = torch.constant.int 5
    %4435 = torch.prims.convert_element_type %4434, %int5_5105 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4435, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int-2_5106 = torch.constant.int -2
    %int-1_5107 = torch.constant.int -1
    %4436 = torch.aten.transpose.int %187, %int-2_5106, %int-1_5107 : !torch.vtensor<[14336,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,14336],f16>
    %int4_5108 = torch.constant.int 4
    %4437 = torch.aten.mul.int %int4_5108, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_5109 = torch.constant.int 4096
    %4438 = torch.prim.ListConstruct %4437, %int4096_5109 : (!torch.int, !torch.int) -> !torch.list<int>
    %4439 = torch.aten.view %4435, %4438 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %4439, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %4440 = torch.aten.mm %4439, %4436 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,14336],f16> -> !torch.vtensor<[?,14336],f16>
    torch.bind_symbolic_shape %4440, [%292], affine_map<()[s0] -> (s0 * 128, 14336)> : !torch.vtensor<[?,14336],f16>
    %int4_5110 = torch.constant.int 4
    %int14336_5111 = torch.constant.int 14336
    %4441 = torch.prim.ListConstruct %int4_5110, %294, %int14336_5111 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4442 = torch.aten.view %4440, %4441 : !torch.vtensor<[?,14336],f16>, !torch.list<int> -> !torch.vtensor<[4,?,14336],f16>
    torch.bind_symbolic_shape %4442, [%292], affine_map<()[s0] -> (4, s0 * 32, 14336)> : !torch.vtensor<[4,?,14336],f16>
    %4443 = torch.aten.silu %4442 : !torch.vtensor<[4,?,14336],f16> -> !torch.vtensor<[4,?,14336],f16>
    torch.bind_symbolic_shape %4443, [%292], affine_map<()[s0] -> (4, s0 * 32, 14336)> : !torch.vtensor<[4,?,14336],f16>
    %int-2_5112 = torch.constant.int -2
    %int-1_5113 = torch.constant.int -1
    %4444 = torch.aten.transpose.int %188, %int-2_5112, %int-1_5113 : !torch.vtensor<[14336,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,14336],f16>
    %int4_5114 = torch.constant.int 4
    %4445 = torch.aten.mul.int %int4_5114, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_5115 = torch.constant.int 4096
    %4446 = torch.prim.ListConstruct %4445, %int4096_5115 : (!torch.int, !torch.int) -> !torch.list<int>
    %4447 = torch.aten.view %4435, %4446 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %4447, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %4448 = torch.aten.mm %4447, %4444 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,14336],f16> -> !torch.vtensor<[?,14336],f16>
    torch.bind_symbolic_shape %4448, [%292], affine_map<()[s0] -> (s0 * 128, 14336)> : !torch.vtensor<[?,14336],f16>
    %int4_5116 = torch.constant.int 4
    %int14336_5117 = torch.constant.int 14336
    %4449 = torch.prim.ListConstruct %int4_5116, %294, %int14336_5117 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4450 = torch.aten.view %4448, %4449 : !torch.vtensor<[?,14336],f16>, !torch.list<int> -> !torch.vtensor<[4,?,14336],f16>
    torch.bind_symbolic_shape %4450, [%292], affine_map<()[s0] -> (4, s0 * 32, 14336)> : !torch.vtensor<[4,?,14336],f16>
    %4451 = torch.aten.mul.Tensor %4443, %4450 : !torch.vtensor<[4,?,14336],f16>, !torch.vtensor<[4,?,14336],f16> -> !torch.vtensor<[4,?,14336],f16>
    torch.bind_symbolic_shape %4451, [%292], affine_map<()[s0] -> (4, s0 * 32, 14336)> : !torch.vtensor<[4,?,14336],f16>
    %int-2_5118 = torch.constant.int -2
    %int-1_5119 = torch.constant.int -1
    %4452 = torch.aten.transpose.int %189, %int-2_5118, %int-1_5119 : !torch.vtensor<[4096,14336],f16>, !torch.int, !torch.int -> !torch.vtensor<[14336,4096],f16>
    %int1_5120 = torch.constant.int 1
    %4453 = torch.aten.size.int %4442, %int1_5120 : !torch.vtensor<[4,?,14336],f16>, !torch.int -> !torch.int
    %int4_5121 = torch.constant.int 4
    %4454 = torch.aten.mul.int %int4_5121, %4453 : !torch.int, !torch.int -> !torch.int
    %int14336_5122 = torch.constant.int 14336
    %4455 = torch.prim.ListConstruct %4454, %int14336_5122 : (!torch.int, !torch.int) -> !torch.list<int>
    %4456 = torch.aten.view %4451, %4455 : !torch.vtensor<[4,?,14336],f16>, !torch.list<int> -> !torch.vtensor<[?,14336],f16>
    torch.bind_symbolic_shape %4456, [%292], affine_map<()[s0] -> (s0 * 128, 14336)> : !torch.vtensor<[?,14336],f16>
    %4457 = torch.aten.mm %4456, %4452 : !torch.vtensor<[?,14336],f16>, !torch.vtensor<[14336,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %4457, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_5123 = torch.constant.int 4
    %int4096_5124 = torch.constant.int 4096
    %4458 = torch.prim.ListConstruct %int4_5123, %4453, %int4096_5124 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4459 = torch.aten.view %4457, %4458 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4459, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_5125 = torch.constant.int 1
    %4460 = torch.aten.add.Tensor %4426, %4459, %int1_5125 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4460, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int6_5126 = torch.constant.int 6
    %4461 = torch.prims.convert_element_type %4460, %int6_5126 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %4461, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_5127 = torch.constant.int 2
    %4462 = torch.aten.pow.Tensor_Scalar %4461, %int2_5127 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %4462, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int-1_5128 = torch.constant.int -1
    %4463 = torch.prim.ListConstruct %int-1_5128 : (!torch.int) -> !torch.list<int>
    %true_5129 = torch.constant.bool true
    %none_5130 = torch.constant.none
    %4464 = torch.aten.mean.dim %4462, %4463, %true_5129, %none_5130 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %4464, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_5131 = torch.constant.float 9.9999997473787516E-6
    %int1_5132 = torch.constant.int 1
    %4465 = torch.aten.add.Scalar %4464, %float9.999990e-06_5131, %int1_5132 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %4465, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %4466 = torch.aten.rsqrt %4465 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %4466, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %4467 = torch.aten.mul.Tensor %4461, %4466 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %4467, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %4468 = torch.aten.mul.Tensor %190, %4467 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %4468, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int5_5133 = torch.constant.int 5
    %4469 = torch.prims.convert_element_type %4468, %int5_5133 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4469, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int-2_5134 = torch.constant.int -2
    %int-1_5135 = torch.constant.int -1
    %4470 = torch.aten.transpose.int %191, %int-2_5134, %int-1_5135 : !torch.vtensor<[4096,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,4096],f16>
    %int4_5136 = torch.constant.int 4
    %4471 = torch.aten.mul.int %int4_5136, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_5137 = torch.constant.int 4096
    %4472 = torch.prim.ListConstruct %4471, %int4096_5137 : (!torch.int, !torch.int) -> !torch.list<int>
    %4473 = torch.aten.view %4469, %4472 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %4473, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %4474 = torch.aten.mm %4473, %4470 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %4474, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_5138 = torch.constant.int 4
    %int4096_5139 = torch.constant.int 4096
    %4475 = torch.prim.ListConstruct %int4_5138, %294, %int4096_5139 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4476 = torch.aten.view %4474, %4475 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4476, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int-2_5140 = torch.constant.int -2
    %int-1_5141 = torch.constant.int -1
    %4477 = torch.aten.transpose.int %192, %int-2_5140, %int-1_5141 : !torch.vtensor<[1024,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,1024],f16>
    %int4_5142 = torch.constant.int 4
    %4478 = torch.aten.mul.int %int4_5142, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_5143 = torch.constant.int 4096
    %4479 = torch.prim.ListConstruct %4478, %int4096_5143 : (!torch.int, !torch.int) -> !torch.list<int>
    %4480 = torch.aten.view %4469, %4479 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %4480, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %4481 = torch.aten.mm %4480, %4477 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1024],f16> -> !torch.vtensor<[?,1024],f16>
    torch.bind_symbolic_shape %4481, [%292], affine_map<()[s0] -> (s0 * 128, 1024)> : !torch.vtensor<[?,1024],f16>
    %int4_5144 = torch.constant.int 4
    %int1024_5145 = torch.constant.int 1024
    %4482 = torch.prim.ListConstruct %int4_5144, %294, %int1024_5145 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4483 = torch.aten.view %4481, %4482 : !torch.vtensor<[?,1024],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1024],f16>
    torch.bind_symbolic_shape %4483, [%292], affine_map<()[s0] -> (4, s0 * 32, 1024)> : !torch.vtensor<[4,?,1024],f16>
    %int-2_5146 = torch.constant.int -2
    %int-1_5147 = torch.constant.int -1
    %4484 = torch.aten.transpose.int %193, %int-2_5146, %int-1_5147 : !torch.vtensor<[1024,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,1024],f16>
    %int4_5148 = torch.constant.int 4
    %4485 = torch.aten.mul.int %int4_5148, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_5149 = torch.constant.int 4096
    %4486 = torch.prim.ListConstruct %4485, %int4096_5149 : (!torch.int, !torch.int) -> !torch.list<int>
    %4487 = torch.aten.view %4469, %4486 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %4487, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %4488 = torch.aten.mm %4487, %4484 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1024],f16> -> !torch.vtensor<[?,1024],f16>
    torch.bind_symbolic_shape %4488, [%292], affine_map<()[s0] -> (s0 * 128, 1024)> : !torch.vtensor<[?,1024],f16>
    %int4_5150 = torch.constant.int 4
    %int1024_5151 = torch.constant.int 1024
    %4489 = torch.prim.ListConstruct %int4_5150, %294, %int1024_5151 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4490 = torch.aten.view %4488, %4489 : !torch.vtensor<[?,1024],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1024],f16>
    torch.bind_symbolic_shape %4490, [%292], affine_map<()[s0] -> (4, s0 * 32, 1024)> : !torch.vtensor<[4,?,1024],f16>
    %int4_5152 = torch.constant.int 4
    %int32_5153 = torch.constant.int 32
    %int128_5154 = torch.constant.int 128
    %4491 = torch.prim.ListConstruct %int4_5152, %294, %int32_5153, %int128_5154 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4492 = torch.aten.view %4476, %4491 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %4492, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int4_5155 = torch.constant.int 4
    %int8_5156 = torch.constant.int 8
    %int128_5157 = torch.constant.int 128
    %4493 = torch.prim.ListConstruct %int4_5155, %294, %int8_5156, %int128_5157 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4494 = torch.aten.view %4483, %4493 : !torch.vtensor<[4,?,1024],f16>, !torch.list<int> -> !torch.vtensor<[4,?,8,128],f16>
    torch.bind_symbolic_shape %4494, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 128)> : !torch.vtensor<[4,?,8,128],f16>
    %int4_5158 = torch.constant.int 4
    %int8_5159 = torch.constant.int 8
    %int128_5160 = torch.constant.int 128
    %4495 = torch.prim.ListConstruct %int4_5158, %294, %int8_5159, %int128_5160 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4496 = torch.aten.view %4490, %4495 : !torch.vtensor<[4,?,1024],f16>, !torch.list<int> -> !torch.vtensor<[4,?,8,128],f16>
    torch.bind_symbolic_shape %4496, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 128)> : !torch.vtensor<[4,?,8,128],f16>
    %int131072_5161 = torch.constant.int 131072
    %none_5162 = torch.constant.none
    %none_5163 = torch.constant.none
    %cpu_5164 = torch.constant.device "cpu"
    %false_5165 = torch.constant.bool false
    %4497 = torch.aten.arange %int131072_5161, %none_5162, %none_5163, %cpu_5164, %false_5165 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64>
    %int0_5166 = torch.constant.int 0
    %int128_5167 = torch.constant.int 128
    %int2_5168 = torch.constant.int 2
    %none_5169 = torch.constant.none
    %none_5170 = torch.constant.none
    %cpu_5171 = torch.constant.device "cpu"
    %false_5172 = torch.constant.bool false
    %4498 = torch.aten.arange.start_step %int0_5166, %int128_5167, %int2_5168, %none_5169, %none_5170, %cpu_5171, %false_5172 : !torch.int, !torch.int, !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64>
    %int0_5173 = torch.constant.int 0
    %int0_5174 = torch.constant.int 0
    %int64_5175 = torch.constant.int 64
    %int1_5176 = torch.constant.int 1
    %4499 = torch.aten.slice.Tensor %4498, %int0_5173, %int0_5174, %int64_5175, %int1_5176 : !torch.vtensor<[64],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[64],si64>
    %int6_5177 = torch.constant.int 6
    %4500 = torch.prims.convert_element_type %4499, %int6_5177 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32>
    %int128_5178 = torch.constant.int 128
    %4501 = torch.aten.div.Scalar %4500, %int128_5178 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32>
    %float5.000000e05_5179 = torch.constant.float 5.000000e+05
    %4502 = torch.aten.pow.Scalar %float5.000000e05_5179, %4501 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %4503 = torch.aten.reciprocal %4502 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %float1.000000e00_5180 = torch.constant.float 1.000000e+00
    %4504 = torch.aten.mul.Scalar %4503, %float1.000000e00_5180 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32>
    %int131072_5181 = torch.constant.int 131072
    %int1_5182 = torch.constant.int 1
    %4505 = torch.prim.ListConstruct %int131072_5181, %int1_5182 : (!torch.int, !torch.int) -> !torch.list<int>
    %4506 = torch.aten.view %4497, %4505 : !torch.vtensor<[131072],si64>, !torch.list<int> -> !torch.vtensor<[131072,1],si64>
    %4507 = torch.aten.mul.Tensor %4506, %4504 : !torch.vtensor<[131072,1],si64>, !torch.vtensor<[64],f32> -> !torch.vtensor<[131072,64],f32>
    %4508 = torch.aten.cos %4507 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %4509 = torch.aten.sin %4507 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %4510 = torch.aten.complex %4508, %4509 : !torch.vtensor<[131072,64],f32>, !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],complex<f32>>
    %int1_5183 = torch.constant.int 1
    %4511 = torch.aten.size.int %4476, %int1_5183 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.int
    %int0_5184 = torch.constant.int 0
    %4512 = torch.aten.add.int %int0_5184, %4511 : !torch.int, !torch.int -> !torch.int
    %int0_5185 = torch.constant.int 0
    %int0_5186 = torch.constant.int 0
    %int1_5187 = torch.constant.int 1
    %4513 = torch.aten.slice.Tensor %4510, %int0_5185, %int0_5186, %4512, %int1_5187 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %4513, [%292], affine_map<()[s0] -> (s0 * 32, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_5188 = torch.constant.int 1
    %int0_5189 = torch.constant.int 0
    %int9223372036854775807_5190 = torch.constant.int 9223372036854775807
    %int1_5191 = torch.constant.int 1
    %4514 = torch.aten.slice.Tensor %4513, %int1_5188, %int0_5189, %int9223372036854775807_5190, %int1_5191 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %4514, [%292], affine_map<()[s0] -> (s0 * 32, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_5192 = torch.constant.int 0
    %4515 = torch.aten.unsqueeze %4514, %int0_5192 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %4515, [%292], affine_map<()[s0] -> (1, s0 * 32, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_5193 = torch.constant.int 2
    %4516 = torch.aten.unsqueeze %4515, %int2_5193 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %4516, [%292], affine_map<()[s0] -> (1, s0 * 32, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_5194 = torch.constant.int 3
    %int0_5195 = torch.constant.int 0
    %int9223372036854775807_5196 = torch.constant.int 9223372036854775807
    %int1_5197 = torch.constant.int 1
    %4517 = torch.aten.slice.Tensor %4516, %int3_5194, %int0_5195, %int9223372036854775807_5196, %int1_5197 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %4517, [%292], affine_map<()[s0] -> (1, s0 * 32, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %4518 = torch_c.to_builtin_tensor %4492 : !torch.vtensor<[4,?,32,128],f16> -> tensor<4x?x32x128xf16>
    %c1_5198 = arith.constant 1 : index
    %dim_5199 = tensor.dim %4518, %c1_5198 : tensor<4x?x32x128xf16>
    %4519 = flow.tensor.bitcast %4518 : tensor<4x?x32x128xf16>{%dim_5199} -> tensor<4x?x32x64xcomplex<f16>>{%dim_5199}
    %4520 = torch_c.from_builtin_tensor %4519 : tensor<4x?x32x64xcomplex<f16>> -> !torch.vtensor<[4,?,32,64],complex<f16>>
    torch.bind_symbolic_shape %4520, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 64)> : !torch.vtensor<[4,?,32,64],complex<f16>>
    %4521 = torch.aten.mul.Tensor %4520, %4517 : !torch.vtensor<[4,?,32,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,32,64],complex<f32>>
    torch.bind_symbolic_shape %4521, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 64)> : !torch.vtensor<[4,?,32,64],complex<f32>>
    %4522 = torch_c.to_builtin_tensor %4521 : !torch.vtensor<[4,?,32,64],complex<f32>> -> tensor<4x?x32x64xcomplex<f32>>
    %c1_5200 = arith.constant 1 : index
    %dim_5201 = tensor.dim %4522, %c1_5200 : tensor<4x?x32x64xcomplex<f32>>
    %4523 = flow.tensor.bitcast %4522 : tensor<4x?x32x64xcomplex<f32>>{%dim_5201} -> tensor<4x?x32x128xf32>{%dim_5201}
    %4524 = torch_c.from_builtin_tensor %4523 : tensor<4x?x32x128xf32> -> !torch.vtensor<[4,?,32,128],f32>
    torch.bind_symbolic_shape %4524, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f32>
    %int5_5202 = torch.constant.int 5
    %4525 = torch.prims.convert_element_type %4524, %int5_5202 : !torch.vtensor<[4,?,32,128],f32>, !torch.int -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %4525, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int131072_5203 = torch.constant.int 131072
    %none_5204 = torch.constant.none
    %none_5205 = torch.constant.none
    %cpu_5206 = torch.constant.device "cpu"
    %false_5207 = torch.constant.bool false
    %4526 = torch.aten.arange %int131072_5203, %none_5204, %none_5205, %cpu_5206, %false_5207 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64>
    %int0_5208 = torch.constant.int 0
    %int128_5209 = torch.constant.int 128
    %int2_5210 = torch.constant.int 2
    %none_5211 = torch.constant.none
    %none_5212 = torch.constant.none
    %cpu_5213 = torch.constant.device "cpu"
    %false_5214 = torch.constant.bool false
    %4527 = torch.aten.arange.start_step %int0_5208, %int128_5209, %int2_5210, %none_5211, %none_5212, %cpu_5213, %false_5214 : !torch.int, !torch.int, !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64>
    %int0_5215 = torch.constant.int 0
    %int0_5216 = torch.constant.int 0
    %int64_5217 = torch.constant.int 64
    %int1_5218 = torch.constant.int 1
    %4528 = torch.aten.slice.Tensor %4527, %int0_5215, %int0_5216, %int64_5217, %int1_5218 : !torch.vtensor<[64],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[64],si64>
    %int6_5219 = torch.constant.int 6
    %4529 = torch.prims.convert_element_type %4528, %int6_5219 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32>
    %int128_5220 = torch.constant.int 128
    %4530 = torch.aten.div.Scalar %4529, %int128_5220 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32>
    %float5.000000e05_5221 = torch.constant.float 5.000000e+05
    %4531 = torch.aten.pow.Scalar %float5.000000e05_5221, %4530 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %4532 = torch.aten.reciprocal %4531 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %float1.000000e00_5222 = torch.constant.float 1.000000e+00
    %4533 = torch.aten.mul.Scalar %4532, %float1.000000e00_5222 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32>
    %int131072_5223 = torch.constant.int 131072
    %int1_5224 = torch.constant.int 1
    %4534 = torch.prim.ListConstruct %int131072_5223, %int1_5224 : (!torch.int, !torch.int) -> !torch.list<int>
    %4535 = torch.aten.view %4526, %4534 : !torch.vtensor<[131072],si64>, !torch.list<int> -> !torch.vtensor<[131072,1],si64>
    %4536 = torch.aten.mul.Tensor %4535, %4533 : !torch.vtensor<[131072,1],si64>, !torch.vtensor<[64],f32> -> !torch.vtensor<[131072,64],f32>
    %4537 = torch.aten.cos %4536 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %4538 = torch.aten.sin %4536 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %4539 = torch.aten.complex %4537, %4538 : !torch.vtensor<[131072,64],f32>, !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],complex<f32>>
    %int1_5225 = torch.constant.int 1
    %4540 = torch.aten.size.int %4483, %int1_5225 : !torch.vtensor<[4,?,1024],f16>, !torch.int -> !torch.int
    %int0_5226 = torch.constant.int 0
    %4541 = torch.aten.add.int %int0_5226, %4540 : !torch.int, !torch.int -> !torch.int
    %int0_5227 = torch.constant.int 0
    %int0_5228 = torch.constant.int 0
    %int1_5229 = torch.constant.int 1
    %4542 = torch.aten.slice.Tensor %4539, %int0_5227, %int0_5228, %4541, %int1_5229 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %4542, [%292], affine_map<()[s0] -> (s0 * 32, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_5230 = torch.constant.int 1
    %int0_5231 = torch.constant.int 0
    %int9223372036854775807_5232 = torch.constant.int 9223372036854775807
    %int1_5233 = torch.constant.int 1
    %4543 = torch.aten.slice.Tensor %4542, %int1_5230, %int0_5231, %int9223372036854775807_5232, %int1_5233 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %4543, [%292], affine_map<()[s0] -> (s0 * 32, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_5234 = torch.constant.int 0
    %4544 = torch.aten.unsqueeze %4543, %int0_5234 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %4544, [%292], affine_map<()[s0] -> (1, s0 * 32, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_5235 = torch.constant.int 2
    %4545 = torch.aten.unsqueeze %4544, %int2_5235 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %4545, [%292], affine_map<()[s0] -> (1, s0 * 32, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_5236 = torch.constant.int 3
    %int0_5237 = torch.constant.int 0
    %int9223372036854775807_5238 = torch.constant.int 9223372036854775807
    %int1_5239 = torch.constant.int 1
    %4546 = torch.aten.slice.Tensor %4545, %int3_5236, %int0_5237, %int9223372036854775807_5238, %int1_5239 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %4546, [%292], affine_map<()[s0] -> (1, s0 * 32, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %4547 = torch_c.to_builtin_tensor %4494 : !torch.vtensor<[4,?,8,128],f16> -> tensor<4x?x8x128xf16>
    %c1_5240 = arith.constant 1 : index
    %dim_5241 = tensor.dim %4547, %c1_5240 : tensor<4x?x8x128xf16>
    %4548 = flow.tensor.bitcast %4547 : tensor<4x?x8x128xf16>{%dim_5241} -> tensor<4x?x8x64xcomplex<f16>>{%dim_5241}
    %4549 = torch_c.from_builtin_tensor %4548 : tensor<4x?x8x64xcomplex<f16>> -> !torch.vtensor<[4,?,8,64],complex<f16>>
    torch.bind_symbolic_shape %4549, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 64)> : !torch.vtensor<[4,?,8,64],complex<f16>>
    %4550 = torch.aten.mul.Tensor %4549, %4546 : !torch.vtensor<[4,?,8,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,8,64],complex<f32>>
    torch.bind_symbolic_shape %4550, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 64)> : !torch.vtensor<[4,?,8,64],complex<f32>>
    %4551 = torch_c.to_builtin_tensor %4550 : !torch.vtensor<[4,?,8,64],complex<f32>> -> tensor<4x?x8x64xcomplex<f32>>
    %c1_5242 = arith.constant 1 : index
    %dim_5243 = tensor.dim %4551, %c1_5242 : tensor<4x?x8x64xcomplex<f32>>
    %4552 = flow.tensor.bitcast %4551 : tensor<4x?x8x64xcomplex<f32>>{%dim_5243} -> tensor<4x?x8x128xf32>{%dim_5243}
    %4553 = torch_c.from_builtin_tensor %4552 : tensor<4x?x8x128xf32> -> !torch.vtensor<[4,?,8,128],f32>
    torch.bind_symbolic_shape %4553, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 128)> : !torch.vtensor<[4,?,8,128],f32>
    %int5_5244 = torch.constant.int 5
    %4554 = torch.prims.convert_element_type %4553, %int5_5244 : !torch.vtensor<[4,?,8,128],f32>, !torch.int -> !torch.vtensor<[4,?,8,128],f16>
    torch.bind_symbolic_shape %4554, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 128)> : !torch.vtensor<[4,?,8,128],f16>
    %int64_5245 = torch.constant.int 64
    %4555 = torch.aten.mul.Scalar %arg2, %int64_5245 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %4555, [%292], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int42 = torch.constant.int 42
    %int1_5246 = torch.constant.int 1
    %4556 = torch.aten.add.Scalar %4555, %int42, %int1_5246 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %4556, [%292], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int4_5247 = torch.constant.int 4
    %int32_5248 = torch.constant.int 32
    %int8_5249 = torch.constant.int 8
    %int128_5250 = torch.constant.int 128
    %4557 = torch.prim.ListConstruct %int4_5247, %425, %int32_5248, %int8_5249, %int128_5250 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4558 = torch.aten.view %4554, %4557 : !torch.vtensor<[4,?,8,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,8,128],f16>
    torch.bind_symbolic_shape %4558, [%292], affine_map<()[s0] -> (4, s0, 32, 8, 128)> : !torch.vtensor<[4,?,32,8,128],f16>
    %int4_5251 = torch.constant.int 4
    %4559 = torch.aten.mul.int %int4_5251, %425 : !torch.int, !torch.int -> !torch.int
    %int32_5252 = torch.constant.int 32
    %int8_5253 = torch.constant.int 8
    %int128_5254 = torch.constant.int 128
    %4560 = torch.prim.ListConstruct %4559, %int32_5252, %int8_5253, %int128_5254 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4561 = torch.aten.view %4558, %4560 : !torch.vtensor<[4,?,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %4561, [%292], affine_map<()[s0] -> (s0 * 4, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int4_5255 = torch.constant.int 4
    %4562 = torch.aten.mul.int %int4_5255, %425 : !torch.int, !torch.int -> !torch.int
    %4563 = torch.prim.ListConstruct %4562 : (!torch.int) -> !torch.list<int>
    %4564 = torch.aten.view %4556, %4563 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %4564, [%292], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int32_5256 = torch.constant.int 32
    %int2_5257 = torch.constant.int 2
    %int32_5258 = torch.constant.int 32
    %int8_5259 = torch.constant.int 8
    %int128_5260 = torch.constant.int 128
    %4565 = torch.prim.ListConstruct %416, %int32_5256, %int2_5257, %int32_5258, %int8_5259, %int128_5260 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4566 = torch.aten.view %4398, %4565 : !torch.vtensor<[?,2097152],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f16>
    torch.bind_symbolic_shape %4566, [%293], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f16>
    %int32_5261 = torch.constant.int 32
    %4567 = torch.aten.mul.int %416, %int32_5261 : !torch.int, !torch.int -> !torch.int
    %int2_5262 = torch.constant.int 2
    %4568 = torch.aten.mul.int %4567, %int2_5262 : !torch.int, !torch.int -> !torch.int
    %int32_5263 = torch.constant.int 32
    %int8_5264 = torch.constant.int 8
    %int128_5265 = torch.constant.int 128
    %4569 = torch.prim.ListConstruct %4568, %int32_5263, %int8_5264, %int128_5265 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4570 = torch.aten.view %4566, %4569 : !torch.vtensor<[?,32,2,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %4570, [%293], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %4571 = torch.prim.ListConstruct %4564 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_5266 = torch.constant.bool false
    %4572 = torch.aten.index_put %4570, %4571, %4561, %false_5266 : !torch.vtensor<[?,32,8,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,32,8,128],f16>, !torch.bool -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %4572, [%293], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int32_5267 = torch.constant.int 32
    %int2_5268 = torch.constant.int 2
    %int32_5269 = torch.constant.int 32
    %int8_5270 = torch.constant.int 8
    %int128_5271 = torch.constant.int 128
    %4573 = torch.prim.ListConstruct %416, %int32_5267, %int2_5268, %int32_5269, %int8_5270, %int128_5271 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4574 = torch.aten.view %4572, %4573 : !torch.vtensor<[?,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f16>
    torch.bind_symbolic_shape %4574, [%293], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f16>
    %int2097152_5272 = torch.constant.int 2097152
    %4575 = torch.prim.ListConstruct %416, %int2097152_5272 : (!torch.int, !torch.int) -> !torch.list<int>
    %4576 = torch.aten.view %4574, %4575 : !torch.vtensor<[?,32,2,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,2097152],f16>
    torch.bind_symbolic_shape %4576, [%293], affine_map<()[s0] -> (s0, 2097152)> : !torch.vtensor<[?,2097152],f16>
    %int32_5273 = torch.constant.int 32
    %int2_5274 = torch.constant.int 2
    %int32_5275 = torch.constant.int 32
    %int8_5276 = torch.constant.int 8
    %int128_5277 = torch.constant.int 128
    %4577 = torch.prim.ListConstruct %416, %int32_5273, %int2_5274, %int32_5275, %int8_5276, %int128_5277 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4578 = torch.aten.view %4576, %4577 : !torch.vtensor<[?,2097152],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f16>
    torch.bind_symbolic_shape %4578, [%293], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f16>
    %int32_5278 = torch.constant.int 32
    %int8_5279 = torch.constant.int 8
    %int128_5280 = torch.constant.int 128
    %4579 = torch.prim.ListConstruct %4568, %int32_5278, %int8_5279, %int128_5280 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4580 = torch.aten.view %4578, %4579 : !torch.vtensor<[?,32,2,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %4580, [%293], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int4_5281 = torch.constant.int 4
    %int32_5282 = torch.constant.int 32
    %int8_5283 = torch.constant.int 8
    %int128_5284 = torch.constant.int 128
    %4581 = torch.prim.ListConstruct %int4_5281, %425, %int32_5282, %int8_5283, %int128_5284 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4582 = torch.aten.view %4496, %4581 : !torch.vtensor<[4,?,8,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,8,128],f16>
    torch.bind_symbolic_shape %4582, [%292], affine_map<()[s0] -> (4, s0, 32, 8, 128)> : !torch.vtensor<[4,?,32,8,128],f16>
    %int4_5285 = torch.constant.int 4
    %4583 = torch.aten.mul.int %int4_5285, %425 : !torch.int, !torch.int -> !torch.int
    %int32_5286 = torch.constant.int 32
    %int8_5287 = torch.constant.int 8
    %int128_5288 = torch.constant.int 128
    %4584 = torch.prim.ListConstruct %4583, %int32_5286, %int8_5287, %int128_5288 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4585 = torch.aten.view %4582, %4584 : !torch.vtensor<[4,?,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %4585, [%292], affine_map<()[s0] -> (s0 * 4, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int1_5289 = torch.constant.int 1
    %int1_5290 = torch.constant.int 1
    %4586 = torch.aten.add.Scalar %4556, %int1_5289, %int1_5290 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %4586, [%292], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int4_5291 = torch.constant.int 4
    %4587 = torch.aten.mul.int %int4_5291, %425 : !torch.int, !torch.int -> !torch.int
    %4588 = torch.prim.ListConstruct %4587 : (!torch.int) -> !torch.list<int>
    %4589 = torch.aten.view %4586, %4588 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %4589, [%292], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %4590 = torch.prim.ListConstruct %4589 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_5292 = torch.constant.bool false
    %4591 = torch.aten.index_put %4580, %4590, %4585, %false_5292 : !torch.vtensor<[?,32,8,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,32,8,128],f16>, !torch.bool -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %4591, [%293], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int32_5293 = torch.constant.int 32
    %int2_5294 = torch.constant.int 2
    %int32_5295 = torch.constant.int 32
    %int8_5296 = torch.constant.int 8
    %int128_5297 = torch.constant.int 128
    %4592 = torch.prim.ListConstruct %416, %int32_5293, %int2_5294, %int32_5295, %int8_5296, %int128_5297 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4593 = torch.aten.view %4591, %4592 : !torch.vtensor<[?,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f16>
    torch.bind_symbolic_shape %4593, [%293], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f16>
    %int2097152_5298 = torch.constant.int 2097152
    %4594 = torch.prim.ListConstruct %416, %int2097152_5298 : (!torch.int, !torch.int) -> !torch.list<int>
    %4595 = torch.aten.view %4593, %4594 : !torch.vtensor<[?,32,2,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,2097152],f16>
    torch.bind_symbolic_shape %4595, [%293], affine_map<()[s0] -> (s0, 2097152)> : !torch.vtensor<[?,2097152],f16>
    %int-2_5299 = torch.constant.int -2
    %4596 = torch.aten.unsqueeze %4554, %int-2_5299 : !torch.vtensor<[4,?,8,128],f16>, !torch.int -> !torch.vtensor<[4,?,8,1,128],f16>
    torch.bind_symbolic_shape %4596, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 1, 128)> : !torch.vtensor<[4,?,8,1,128],f16>
    %int4_5300 = torch.constant.int 4
    %int8_5301 = torch.constant.int 8
    %int4_5302 = torch.constant.int 4
    %int128_5303 = torch.constant.int 128
    %4597 = torch.prim.ListConstruct %int4_5300, %4540, %int8_5301, %int4_5302, %int128_5303 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_5304 = torch.constant.bool false
    %4598 = torch.aten.expand %4596, %4597, %false_5304 : !torch.vtensor<[4,?,8,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,8,4,128],f16>
    torch.bind_symbolic_shape %4598, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 4, 128)> : !torch.vtensor<[4,?,8,4,128],f16>
    %int0_5305 = torch.constant.int 0
    %4599 = torch.aten.clone %4598, %int0_5305 : !torch.vtensor<[4,?,8,4,128],f16>, !torch.int -> !torch.vtensor<[4,?,8,4,128],f16>
    torch.bind_symbolic_shape %4599, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 4, 128)> : !torch.vtensor<[4,?,8,4,128],f16>
    %int4_5306 = torch.constant.int 4
    %int32_5307 = torch.constant.int 32
    %int128_5308 = torch.constant.int 128
    %4600 = torch.prim.ListConstruct %int4_5306, %4540, %int32_5307, %int128_5308 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4601 = torch.aten._unsafe_view %4599, %4600 : !torch.vtensor<[4,?,8,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %4601, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int-2_5309 = torch.constant.int -2
    %4602 = torch.aten.unsqueeze %4496, %int-2_5309 : !torch.vtensor<[4,?,8,128],f16>, !torch.int -> !torch.vtensor<[4,?,8,1,128],f16>
    torch.bind_symbolic_shape %4602, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 1, 128)> : !torch.vtensor<[4,?,8,1,128],f16>
    %int1_5310 = torch.constant.int 1
    %4603 = torch.aten.size.int %4490, %int1_5310 : !torch.vtensor<[4,?,1024],f16>, !torch.int -> !torch.int
    %int4_5311 = torch.constant.int 4
    %int8_5312 = torch.constant.int 8
    %int4_5313 = torch.constant.int 4
    %int128_5314 = torch.constant.int 128
    %4604 = torch.prim.ListConstruct %int4_5311, %4603, %int8_5312, %int4_5313, %int128_5314 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_5315 = torch.constant.bool false
    %4605 = torch.aten.expand %4602, %4604, %false_5315 : !torch.vtensor<[4,?,8,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,8,4,128],f16>
    torch.bind_symbolic_shape %4605, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 4, 128)> : !torch.vtensor<[4,?,8,4,128],f16>
    %int0_5316 = torch.constant.int 0
    %4606 = torch.aten.clone %4605, %int0_5316 : !torch.vtensor<[4,?,8,4,128],f16>, !torch.int -> !torch.vtensor<[4,?,8,4,128],f16>
    torch.bind_symbolic_shape %4606, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 4, 128)> : !torch.vtensor<[4,?,8,4,128],f16>
    %int4_5317 = torch.constant.int 4
    %int32_5318 = torch.constant.int 32
    %int128_5319 = torch.constant.int 128
    %4607 = torch.prim.ListConstruct %int4_5317, %4603, %int32_5318, %int128_5319 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4608 = torch.aten._unsafe_view %4606, %4607 : !torch.vtensor<[4,?,8,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %4608, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int1_5320 = torch.constant.int 1
    %int2_5321 = torch.constant.int 2
    %4609 = torch.aten.transpose.int %4525, %int1_5320, %int2_5321 : !torch.vtensor<[4,?,32,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,32,?,128],f16>
    torch.bind_symbolic_shape %4609, [%292], affine_map<()[s0] -> (4, 32, s0 * 32, 128)> : !torch.vtensor<[4,32,?,128],f16>
    %int1_5322 = torch.constant.int 1
    %int2_5323 = torch.constant.int 2
    %4610 = torch.aten.transpose.int %4601, %int1_5322, %int2_5323 : !torch.vtensor<[4,?,32,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,32,?,128],f16>
    torch.bind_symbolic_shape %4610, [%292], affine_map<()[s0] -> (4, 32, s0 * 32, 128)> : !torch.vtensor<[4,32,?,128],f16>
    %int1_5324 = torch.constant.int 1
    %int2_5325 = torch.constant.int 2
    %4611 = torch.aten.transpose.int %4608, %int1_5324, %int2_5325 : !torch.vtensor<[4,?,32,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,32,?,128],f16>
    torch.bind_symbolic_shape %4611, [%292], affine_map<()[s0] -> (4, 32, s0 * 32, 128)> : !torch.vtensor<[4,32,?,128],f16>
    %float0.000000e00_5326 = torch.constant.float 0.000000e+00
    %false_5327 = torch.constant.bool false
    %none_5328 = torch.constant.none
    %4612:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%4609, %4610, %4611, %float0.000000e00_5326, %false_5327, %320, %none_5328) : (!torch.vtensor<[4,32,?,128],f16>, !torch.vtensor<[4,32,?,128],f16>, !torch.vtensor<[4,32,?,128],f16>, !torch.float, !torch.bool, !torch.vtensor<[4,1,?,?],f16>, !torch.none) -> (!torch.vtensor<[4,32,?,128],f16>, !torch.vtensor<[4,32,?],f32>) 
    torch.bind_symbolic_shape %4612#0, [%292], affine_map<()[s0] -> (4, 32, s0 * 32, 128)> : !torch.vtensor<[4,32,?,128],f16>
    %int1_5329 = torch.constant.int 1
    %int2_5330 = torch.constant.int 2
    %4613 = torch.aten.transpose.int %4612#0, %int1_5329, %int2_5330 : !torch.vtensor<[4,32,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %4613, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int4_5331 = torch.constant.int 4
    %int4096_5332 = torch.constant.int 4096
    %4614 = torch.prim.ListConstruct %int4_5331, %4511, %int4096_5332 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4615 = torch.aten.view %4613, %4614 : !torch.vtensor<[4,?,32,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4615, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int-2_5333 = torch.constant.int -2
    %int-1_5334 = torch.constant.int -1
    %4616 = torch.aten.transpose.int %194, %int-2_5333, %int-1_5334 : !torch.vtensor<[4096,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,4096],f16>
    %int4_5335 = torch.constant.int 4
    %4617 = torch.aten.mul.int %int4_5335, %4511 : !torch.int, !torch.int -> !torch.int
    %int4096_5336 = torch.constant.int 4096
    %4618 = torch.prim.ListConstruct %4617, %int4096_5336 : (!torch.int, !torch.int) -> !torch.list<int>
    %4619 = torch.aten.view %4615, %4618 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %4619, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %4620 = torch.aten.mm %4619, %4616 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %4620, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_5337 = torch.constant.int 4
    %int4096_5338 = torch.constant.int 4096
    %4621 = torch.prim.ListConstruct %int4_5337, %4511, %int4096_5338 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4622 = torch.aten.view %4620, %4621 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4622, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_5339 = torch.constant.int 1
    %4623 = torch.aten.add.Tensor %4460, %4622, %int1_5339 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4623, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int6_5340 = torch.constant.int 6
    %4624 = torch.prims.convert_element_type %4623, %int6_5340 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %4624, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_5341 = torch.constant.int 2
    %4625 = torch.aten.pow.Tensor_Scalar %4624, %int2_5341 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %4625, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int-1_5342 = torch.constant.int -1
    %4626 = torch.prim.ListConstruct %int-1_5342 : (!torch.int) -> !torch.list<int>
    %true_5343 = torch.constant.bool true
    %none_5344 = torch.constant.none
    %4627 = torch.aten.mean.dim %4625, %4626, %true_5343, %none_5344 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %4627, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_5345 = torch.constant.float 9.9999997473787516E-6
    %int1_5346 = torch.constant.int 1
    %4628 = torch.aten.add.Scalar %4627, %float9.999990e-06_5345, %int1_5346 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %4628, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %4629 = torch.aten.rsqrt %4628 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %4629, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %4630 = torch.aten.mul.Tensor %4624, %4629 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %4630, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %4631 = torch.aten.mul.Tensor %195, %4630 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %4631, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int5_5347 = torch.constant.int 5
    %4632 = torch.prims.convert_element_type %4631, %int5_5347 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4632, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int-2_5348 = torch.constant.int -2
    %int-1_5349 = torch.constant.int -1
    %4633 = torch.aten.transpose.int %196, %int-2_5348, %int-1_5349 : !torch.vtensor<[14336,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,14336],f16>
    %int4_5350 = torch.constant.int 4
    %4634 = torch.aten.mul.int %int4_5350, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_5351 = torch.constant.int 4096
    %4635 = torch.prim.ListConstruct %4634, %int4096_5351 : (!torch.int, !torch.int) -> !torch.list<int>
    %4636 = torch.aten.view %4632, %4635 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %4636, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %4637 = torch.aten.mm %4636, %4633 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,14336],f16> -> !torch.vtensor<[?,14336],f16>
    torch.bind_symbolic_shape %4637, [%292], affine_map<()[s0] -> (s0 * 128, 14336)> : !torch.vtensor<[?,14336],f16>
    %int4_5352 = torch.constant.int 4
    %int14336_5353 = torch.constant.int 14336
    %4638 = torch.prim.ListConstruct %int4_5352, %294, %int14336_5353 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4639 = torch.aten.view %4637, %4638 : !torch.vtensor<[?,14336],f16>, !torch.list<int> -> !torch.vtensor<[4,?,14336],f16>
    torch.bind_symbolic_shape %4639, [%292], affine_map<()[s0] -> (4, s0 * 32, 14336)> : !torch.vtensor<[4,?,14336],f16>
    %4640 = torch.aten.silu %4639 : !torch.vtensor<[4,?,14336],f16> -> !torch.vtensor<[4,?,14336],f16>
    torch.bind_symbolic_shape %4640, [%292], affine_map<()[s0] -> (4, s0 * 32, 14336)> : !torch.vtensor<[4,?,14336],f16>
    %int-2_5354 = torch.constant.int -2
    %int-1_5355 = torch.constant.int -1
    %4641 = torch.aten.transpose.int %197, %int-2_5354, %int-1_5355 : !torch.vtensor<[14336,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,14336],f16>
    %int4_5356 = torch.constant.int 4
    %4642 = torch.aten.mul.int %int4_5356, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_5357 = torch.constant.int 4096
    %4643 = torch.prim.ListConstruct %4642, %int4096_5357 : (!torch.int, !torch.int) -> !torch.list<int>
    %4644 = torch.aten.view %4632, %4643 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %4644, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %4645 = torch.aten.mm %4644, %4641 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,14336],f16> -> !torch.vtensor<[?,14336],f16>
    torch.bind_symbolic_shape %4645, [%292], affine_map<()[s0] -> (s0 * 128, 14336)> : !torch.vtensor<[?,14336],f16>
    %int4_5358 = torch.constant.int 4
    %int14336_5359 = torch.constant.int 14336
    %4646 = torch.prim.ListConstruct %int4_5358, %294, %int14336_5359 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4647 = torch.aten.view %4645, %4646 : !torch.vtensor<[?,14336],f16>, !torch.list<int> -> !torch.vtensor<[4,?,14336],f16>
    torch.bind_symbolic_shape %4647, [%292], affine_map<()[s0] -> (4, s0 * 32, 14336)> : !torch.vtensor<[4,?,14336],f16>
    %4648 = torch.aten.mul.Tensor %4640, %4647 : !torch.vtensor<[4,?,14336],f16>, !torch.vtensor<[4,?,14336],f16> -> !torch.vtensor<[4,?,14336],f16>
    torch.bind_symbolic_shape %4648, [%292], affine_map<()[s0] -> (4, s0 * 32, 14336)> : !torch.vtensor<[4,?,14336],f16>
    %int-2_5360 = torch.constant.int -2
    %int-1_5361 = torch.constant.int -1
    %4649 = torch.aten.transpose.int %198, %int-2_5360, %int-1_5361 : !torch.vtensor<[4096,14336],f16>, !torch.int, !torch.int -> !torch.vtensor<[14336,4096],f16>
    %int1_5362 = torch.constant.int 1
    %4650 = torch.aten.size.int %4639, %int1_5362 : !torch.vtensor<[4,?,14336],f16>, !torch.int -> !torch.int
    %int4_5363 = torch.constant.int 4
    %4651 = torch.aten.mul.int %int4_5363, %4650 : !torch.int, !torch.int -> !torch.int
    %int14336_5364 = torch.constant.int 14336
    %4652 = torch.prim.ListConstruct %4651, %int14336_5364 : (!torch.int, !torch.int) -> !torch.list<int>
    %4653 = torch.aten.view %4648, %4652 : !torch.vtensor<[4,?,14336],f16>, !torch.list<int> -> !torch.vtensor<[?,14336],f16>
    torch.bind_symbolic_shape %4653, [%292], affine_map<()[s0] -> (s0 * 128, 14336)> : !torch.vtensor<[?,14336],f16>
    %4654 = torch.aten.mm %4653, %4649 : !torch.vtensor<[?,14336],f16>, !torch.vtensor<[14336,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %4654, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_5365 = torch.constant.int 4
    %int4096_5366 = torch.constant.int 4096
    %4655 = torch.prim.ListConstruct %int4_5365, %4650, %int4096_5366 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4656 = torch.aten.view %4654, %4655 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4656, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_5367 = torch.constant.int 1
    %4657 = torch.aten.add.Tensor %4623, %4656, %int1_5367 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4657, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int6_5368 = torch.constant.int 6
    %4658 = torch.prims.convert_element_type %4657, %int6_5368 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %4658, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_5369 = torch.constant.int 2
    %4659 = torch.aten.pow.Tensor_Scalar %4658, %int2_5369 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %4659, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int-1_5370 = torch.constant.int -1
    %4660 = torch.prim.ListConstruct %int-1_5370 : (!torch.int) -> !torch.list<int>
    %true_5371 = torch.constant.bool true
    %none_5372 = torch.constant.none
    %4661 = torch.aten.mean.dim %4659, %4660, %true_5371, %none_5372 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %4661, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_5373 = torch.constant.float 9.9999997473787516E-6
    %int1_5374 = torch.constant.int 1
    %4662 = torch.aten.add.Scalar %4661, %float9.999990e-06_5373, %int1_5374 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %4662, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %4663 = torch.aten.rsqrt %4662 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %4663, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %4664 = torch.aten.mul.Tensor %4658, %4663 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %4664, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %4665 = torch.aten.mul.Tensor %199, %4664 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %4665, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int5_5375 = torch.constant.int 5
    %4666 = torch.prims.convert_element_type %4665, %int5_5375 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4666, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int-2_5376 = torch.constant.int -2
    %int-1_5377 = torch.constant.int -1
    %4667 = torch.aten.transpose.int %200, %int-2_5376, %int-1_5377 : !torch.vtensor<[4096,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,4096],f16>
    %int4_5378 = torch.constant.int 4
    %4668 = torch.aten.mul.int %int4_5378, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_5379 = torch.constant.int 4096
    %4669 = torch.prim.ListConstruct %4668, %int4096_5379 : (!torch.int, !torch.int) -> !torch.list<int>
    %4670 = torch.aten.view %4666, %4669 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %4670, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %4671 = torch.aten.mm %4670, %4667 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %4671, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_5380 = torch.constant.int 4
    %int4096_5381 = torch.constant.int 4096
    %4672 = torch.prim.ListConstruct %int4_5380, %294, %int4096_5381 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4673 = torch.aten.view %4671, %4672 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4673, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int-2_5382 = torch.constant.int -2
    %int-1_5383 = torch.constant.int -1
    %4674 = torch.aten.transpose.int %201, %int-2_5382, %int-1_5383 : !torch.vtensor<[1024,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,1024],f16>
    %int4_5384 = torch.constant.int 4
    %4675 = torch.aten.mul.int %int4_5384, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_5385 = torch.constant.int 4096
    %4676 = torch.prim.ListConstruct %4675, %int4096_5385 : (!torch.int, !torch.int) -> !torch.list<int>
    %4677 = torch.aten.view %4666, %4676 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %4677, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %4678 = torch.aten.mm %4677, %4674 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1024],f16> -> !torch.vtensor<[?,1024],f16>
    torch.bind_symbolic_shape %4678, [%292], affine_map<()[s0] -> (s0 * 128, 1024)> : !torch.vtensor<[?,1024],f16>
    %int4_5386 = torch.constant.int 4
    %int1024_5387 = torch.constant.int 1024
    %4679 = torch.prim.ListConstruct %int4_5386, %294, %int1024_5387 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4680 = torch.aten.view %4678, %4679 : !torch.vtensor<[?,1024],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1024],f16>
    torch.bind_symbolic_shape %4680, [%292], affine_map<()[s0] -> (4, s0 * 32, 1024)> : !torch.vtensor<[4,?,1024],f16>
    %int-2_5388 = torch.constant.int -2
    %int-1_5389 = torch.constant.int -1
    %4681 = torch.aten.transpose.int %202, %int-2_5388, %int-1_5389 : !torch.vtensor<[1024,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,1024],f16>
    %int4_5390 = torch.constant.int 4
    %4682 = torch.aten.mul.int %int4_5390, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_5391 = torch.constant.int 4096
    %4683 = torch.prim.ListConstruct %4682, %int4096_5391 : (!torch.int, !torch.int) -> !torch.list<int>
    %4684 = torch.aten.view %4666, %4683 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %4684, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %4685 = torch.aten.mm %4684, %4681 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1024],f16> -> !torch.vtensor<[?,1024],f16>
    torch.bind_symbolic_shape %4685, [%292], affine_map<()[s0] -> (s0 * 128, 1024)> : !torch.vtensor<[?,1024],f16>
    %int4_5392 = torch.constant.int 4
    %int1024_5393 = torch.constant.int 1024
    %4686 = torch.prim.ListConstruct %int4_5392, %294, %int1024_5393 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4687 = torch.aten.view %4685, %4686 : !torch.vtensor<[?,1024],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1024],f16>
    torch.bind_symbolic_shape %4687, [%292], affine_map<()[s0] -> (4, s0 * 32, 1024)> : !torch.vtensor<[4,?,1024],f16>
    %int4_5394 = torch.constant.int 4
    %int32_5395 = torch.constant.int 32
    %int128_5396 = torch.constant.int 128
    %4688 = torch.prim.ListConstruct %int4_5394, %294, %int32_5395, %int128_5396 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4689 = torch.aten.view %4673, %4688 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %4689, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int4_5397 = torch.constant.int 4
    %int8_5398 = torch.constant.int 8
    %int128_5399 = torch.constant.int 128
    %4690 = torch.prim.ListConstruct %int4_5397, %294, %int8_5398, %int128_5399 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4691 = torch.aten.view %4680, %4690 : !torch.vtensor<[4,?,1024],f16>, !torch.list<int> -> !torch.vtensor<[4,?,8,128],f16>
    torch.bind_symbolic_shape %4691, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 128)> : !torch.vtensor<[4,?,8,128],f16>
    %int4_5400 = torch.constant.int 4
    %int8_5401 = torch.constant.int 8
    %int128_5402 = torch.constant.int 128
    %4692 = torch.prim.ListConstruct %int4_5400, %294, %int8_5401, %int128_5402 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4693 = torch.aten.view %4687, %4692 : !torch.vtensor<[4,?,1024],f16>, !torch.list<int> -> !torch.vtensor<[4,?,8,128],f16>
    torch.bind_symbolic_shape %4693, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 128)> : !torch.vtensor<[4,?,8,128],f16>
    %int131072_5403 = torch.constant.int 131072
    %none_5404 = torch.constant.none
    %none_5405 = torch.constant.none
    %cpu_5406 = torch.constant.device "cpu"
    %false_5407 = torch.constant.bool false
    %4694 = torch.aten.arange %int131072_5403, %none_5404, %none_5405, %cpu_5406, %false_5407 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64>
    %int0_5408 = torch.constant.int 0
    %int128_5409 = torch.constant.int 128
    %int2_5410 = torch.constant.int 2
    %none_5411 = torch.constant.none
    %none_5412 = torch.constant.none
    %cpu_5413 = torch.constant.device "cpu"
    %false_5414 = torch.constant.bool false
    %4695 = torch.aten.arange.start_step %int0_5408, %int128_5409, %int2_5410, %none_5411, %none_5412, %cpu_5413, %false_5414 : !torch.int, !torch.int, !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64>
    %int0_5415 = torch.constant.int 0
    %int0_5416 = torch.constant.int 0
    %int64_5417 = torch.constant.int 64
    %int1_5418 = torch.constant.int 1
    %4696 = torch.aten.slice.Tensor %4695, %int0_5415, %int0_5416, %int64_5417, %int1_5418 : !torch.vtensor<[64],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[64],si64>
    %int6_5419 = torch.constant.int 6
    %4697 = torch.prims.convert_element_type %4696, %int6_5419 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32>
    %int128_5420 = torch.constant.int 128
    %4698 = torch.aten.div.Scalar %4697, %int128_5420 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32>
    %float5.000000e05_5421 = torch.constant.float 5.000000e+05
    %4699 = torch.aten.pow.Scalar %float5.000000e05_5421, %4698 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %4700 = torch.aten.reciprocal %4699 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %float1.000000e00_5422 = torch.constant.float 1.000000e+00
    %4701 = torch.aten.mul.Scalar %4700, %float1.000000e00_5422 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32>
    %int131072_5423 = torch.constant.int 131072
    %int1_5424 = torch.constant.int 1
    %4702 = torch.prim.ListConstruct %int131072_5423, %int1_5424 : (!torch.int, !torch.int) -> !torch.list<int>
    %4703 = torch.aten.view %4694, %4702 : !torch.vtensor<[131072],si64>, !torch.list<int> -> !torch.vtensor<[131072,1],si64>
    %4704 = torch.aten.mul.Tensor %4703, %4701 : !torch.vtensor<[131072,1],si64>, !torch.vtensor<[64],f32> -> !torch.vtensor<[131072,64],f32>
    %4705 = torch.aten.cos %4704 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %4706 = torch.aten.sin %4704 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %4707 = torch.aten.complex %4705, %4706 : !torch.vtensor<[131072,64],f32>, !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],complex<f32>>
    %int1_5425 = torch.constant.int 1
    %4708 = torch.aten.size.int %4673, %int1_5425 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.int
    %int0_5426 = torch.constant.int 0
    %4709 = torch.aten.add.int %int0_5426, %4708 : !torch.int, !torch.int -> !torch.int
    %int0_5427 = torch.constant.int 0
    %int0_5428 = torch.constant.int 0
    %int1_5429 = torch.constant.int 1
    %4710 = torch.aten.slice.Tensor %4707, %int0_5427, %int0_5428, %4709, %int1_5429 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %4710, [%292], affine_map<()[s0] -> (s0 * 32, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_5430 = torch.constant.int 1
    %int0_5431 = torch.constant.int 0
    %int9223372036854775807_5432 = torch.constant.int 9223372036854775807
    %int1_5433 = torch.constant.int 1
    %4711 = torch.aten.slice.Tensor %4710, %int1_5430, %int0_5431, %int9223372036854775807_5432, %int1_5433 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %4711, [%292], affine_map<()[s0] -> (s0 * 32, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_5434 = torch.constant.int 0
    %4712 = torch.aten.unsqueeze %4711, %int0_5434 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %4712, [%292], affine_map<()[s0] -> (1, s0 * 32, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_5435 = torch.constant.int 2
    %4713 = torch.aten.unsqueeze %4712, %int2_5435 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %4713, [%292], affine_map<()[s0] -> (1, s0 * 32, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_5436 = torch.constant.int 3
    %int0_5437 = torch.constant.int 0
    %int9223372036854775807_5438 = torch.constant.int 9223372036854775807
    %int1_5439 = torch.constant.int 1
    %4714 = torch.aten.slice.Tensor %4713, %int3_5436, %int0_5437, %int9223372036854775807_5438, %int1_5439 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %4714, [%292], affine_map<()[s0] -> (1, s0 * 32, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %4715 = torch_c.to_builtin_tensor %4689 : !torch.vtensor<[4,?,32,128],f16> -> tensor<4x?x32x128xf16>
    %c1_5440 = arith.constant 1 : index
    %dim_5441 = tensor.dim %4715, %c1_5440 : tensor<4x?x32x128xf16>
    %4716 = flow.tensor.bitcast %4715 : tensor<4x?x32x128xf16>{%dim_5441} -> tensor<4x?x32x64xcomplex<f16>>{%dim_5441}
    %4717 = torch_c.from_builtin_tensor %4716 : tensor<4x?x32x64xcomplex<f16>> -> !torch.vtensor<[4,?,32,64],complex<f16>>
    torch.bind_symbolic_shape %4717, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 64)> : !torch.vtensor<[4,?,32,64],complex<f16>>
    %4718 = torch.aten.mul.Tensor %4717, %4714 : !torch.vtensor<[4,?,32,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,32,64],complex<f32>>
    torch.bind_symbolic_shape %4718, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 64)> : !torch.vtensor<[4,?,32,64],complex<f32>>
    %4719 = torch_c.to_builtin_tensor %4718 : !torch.vtensor<[4,?,32,64],complex<f32>> -> tensor<4x?x32x64xcomplex<f32>>
    %c1_5442 = arith.constant 1 : index
    %dim_5443 = tensor.dim %4719, %c1_5442 : tensor<4x?x32x64xcomplex<f32>>
    %4720 = flow.tensor.bitcast %4719 : tensor<4x?x32x64xcomplex<f32>>{%dim_5443} -> tensor<4x?x32x128xf32>{%dim_5443}
    %4721 = torch_c.from_builtin_tensor %4720 : tensor<4x?x32x128xf32> -> !torch.vtensor<[4,?,32,128],f32>
    torch.bind_symbolic_shape %4721, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f32>
    %int5_5444 = torch.constant.int 5
    %4722 = torch.prims.convert_element_type %4721, %int5_5444 : !torch.vtensor<[4,?,32,128],f32>, !torch.int -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %4722, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int131072_5445 = torch.constant.int 131072
    %none_5446 = torch.constant.none
    %none_5447 = torch.constant.none
    %cpu_5448 = torch.constant.device "cpu"
    %false_5449 = torch.constant.bool false
    %4723 = torch.aten.arange %int131072_5445, %none_5446, %none_5447, %cpu_5448, %false_5449 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64>
    %int0_5450 = torch.constant.int 0
    %int128_5451 = torch.constant.int 128
    %int2_5452 = torch.constant.int 2
    %none_5453 = torch.constant.none
    %none_5454 = torch.constant.none
    %cpu_5455 = torch.constant.device "cpu"
    %false_5456 = torch.constant.bool false
    %4724 = torch.aten.arange.start_step %int0_5450, %int128_5451, %int2_5452, %none_5453, %none_5454, %cpu_5455, %false_5456 : !torch.int, !torch.int, !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64>
    %int0_5457 = torch.constant.int 0
    %int0_5458 = torch.constant.int 0
    %int64_5459 = torch.constant.int 64
    %int1_5460 = torch.constant.int 1
    %4725 = torch.aten.slice.Tensor %4724, %int0_5457, %int0_5458, %int64_5459, %int1_5460 : !torch.vtensor<[64],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[64],si64>
    %int6_5461 = torch.constant.int 6
    %4726 = torch.prims.convert_element_type %4725, %int6_5461 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32>
    %int128_5462 = torch.constant.int 128
    %4727 = torch.aten.div.Scalar %4726, %int128_5462 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32>
    %float5.000000e05_5463 = torch.constant.float 5.000000e+05
    %4728 = torch.aten.pow.Scalar %float5.000000e05_5463, %4727 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %4729 = torch.aten.reciprocal %4728 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %float1.000000e00_5464 = torch.constant.float 1.000000e+00
    %4730 = torch.aten.mul.Scalar %4729, %float1.000000e00_5464 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32>
    %int131072_5465 = torch.constant.int 131072
    %int1_5466 = torch.constant.int 1
    %4731 = torch.prim.ListConstruct %int131072_5465, %int1_5466 : (!torch.int, !torch.int) -> !torch.list<int>
    %4732 = torch.aten.view %4723, %4731 : !torch.vtensor<[131072],si64>, !torch.list<int> -> !torch.vtensor<[131072,1],si64>
    %4733 = torch.aten.mul.Tensor %4732, %4730 : !torch.vtensor<[131072,1],si64>, !torch.vtensor<[64],f32> -> !torch.vtensor<[131072,64],f32>
    %4734 = torch.aten.cos %4733 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %4735 = torch.aten.sin %4733 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %4736 = torch.aten.complex %4734, %4735 : !torch.vtensor<[131072,64],f32>, !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],complex<f32>>
    %int1_5467 = torch.constant.int 1
    %4737 = torch.aten.size.int %4680, %int1_5467 : !torch.vtensor<[4,?,1024],f16>, !torch.int -> !torch.int
    %int0_5468 = torch.constant.int 0
    %4738 = torch.aten.add.int %int0_5468, %4737 : !torch.int, !torch.int -> !torch.int
    %int0_5469 = torch.constant.int 0
    %int0_5470 = torch.constant.int 0
    %int1_5471 = torch.constant.int 1
    %4739 = torch.aten.slice.Tensor %4736, %int0_5469, %int0_5470, %4738, %int1_5471 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %4739, [%292], affine_map<()[s0] -> (s0 * 32, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_5472 = torch.constant.int 1
    %int0_5473 = torch.constant.int 0
    %int9223372036854775807_5474 = torch.constant.int 9223372036854775807
    %int1_5475 = torch.constant.int 1
    %4740 = torch.aten.slice.Tensor %4739, %int1_5472, %int0_5473, %int9223372036854775807_5474, %int1_5475 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %4740, [%292], affine_map<()[s0] -> (s0 * 32, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_5476 = torch.constant.int 0
    %4741 = torch.aten.unsqueeze %4740, %int0_5476 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %4741, [%292], affine_map<()[s0] -> (1, s0 * 32, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_5477 = torch.constant.int 2
    %4742 = torch.aten.unsqueeze %4741, %int2_5477 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %4742, [%292], affine_map<()[s0] -> (1, s0 * 32, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_5478 = torch.constant.int 3
    %int0_5479 = torch.constant.int 0
    %int9223372036854775807_5480 = torch.constant.int 9223372036854775807
    %int1_5481 = torch.constant.int 1
    %4743 = torch.aten.slice.Tensor %4742, %int3_5478, %int0_5479, %int9223372036854775807_5480, %int1_5481 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %4743, [%292], affine_map<()[s0] -> (1, s0 * 32, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %4744 = torch_c.to_builtin_tensor %4691 : !torch.vtensor<[4,?,8,128],f16> -> tensor<4x?x8x128xf16>
    %c1_5482 = arith.constant 1 : index
    %dim_5483 = tensor.dim %4744, %c1_5482 : tensor<4x?x8x128xf16>
    %4745 = flow.tensor.bitcast %4744 : tensor<4x?x8x128xf16>{%dim_5483} -> tensor<4x?x8x64xcomplex<f16>>{%dim_5483}
    %4746 = torch_c.from_builtin_tensor %4745 : tensor<4x?x8x64xcomplex<f16>> -> !torch.vtensor<[4,?,8,64],complex<f16>>
    torch.bind_symbolic_shape %4746, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 64)> : !torch.vtensor<[4,?,8,64],complex<f16>>
    %4747 = torch.aten.mul.Tensor %4746, %4743 : !torch.vtensor<[4,?,8,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,8,64],complex<f32>>
    torch.bind_symbolic_shape %4747, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 64)> : !torch.vtensor<[4,?,8,64],complex<f32>>
    %4748 = torch_c.to_builtin_tensor %4747 : !torch.vtensor<[4,?,8,64],complex<f32>> -> tensor<4x?x8x64xcomplex<f32>>
    %c1_5484 = arith.constant 1 : index
    %dim_5485 = tensor.dim %4748, %c1_5484 : tensor<4x?x8x64xcomplex<f32>>
    %4749 = flow.tensor.bitcast %4748 : tensor<4x?x8x64xcomplex<f32>>{%dim_5485} -> tensor<4x?x8x128xf32>{%dim_5485}
    %4750 = torch_c.from_builtin_tensor %4749 : tensor<4x?x8x128xf32> -> !torch.vtensor<[4,?,8,128],f32>
    torch.bind_symbolic_shape %4750, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 128)> : !torch.vtensor<[4,?,8,128],f32>
    %int5_5486 = torch.constant.int 5
    %4751 = torch.prims.convert_element_type %4750, %int5_5486 : !torch.vtensor<[4,?,8,128],f32>, !torch.int -> !torch.vtensor<[4,?,8,128],f16>
    torch.bind_symbolic_shape %4751, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 128)> : !torch.vtensor<[4,?,8,128],f16>
    %int64_5487 = torch.constant.int 64
    %4752 = torch.aten.mul.Scalar %arg2, %int64_5487 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %4752, [%292], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int44 = torch.constant.int 44
    %int1_5488 = torch.constant.int 1
    %4753 = torch.aten.add.Scalar %4752, %int44, %int1_5488 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %4753, [%292], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int4_5489 = torch.constant.int 4
    %int32_5490 = torch.constant.int 32
    %int8_5491 = torch.constant.int 8
    %int128_5492 = torch.constant.int 128
    %4754 = torch.prim.ListConstruct %int4_5489, %425, %int32_5490, %int8_5491, %int128_5492 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4755 = torch.aten.view %4751, %4754 : !torch.vtensor<[4,?,8,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,8,128],f16>
    torch.bind_symbolic_shape %4755, [%292], affine_map<()[s0] -> (4, s0, 32, 8, 128)> : !torch.vtensor<[4,?,32,8,128],f16>
    %int4_5493 = torch.constant.int 4
    %4756 = torch.aten.mul.int %int4_5493, %425 : !torch.int, !torch.int -> !torch.int
    %int32_5494 = torch.constant.int 32
    %int8_5495 = torch.constant.int 8
    %int128_5496 = torch.constant.int 128
    %4757 = torch.prim.ListConstruct %4756, %int32_5494, %int8_5495, %int128_5496 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4758 = torch.aten.view %4755, %4757 : !torch.vtensor<[4,?,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %4758, [%292], affine_map<()[s0] -> (s0 * 4, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int4_5497 = torch.constant.int 4
    %4759 = torch.aten.mul.int %int4_5497, %425 : !torch.int, !torch.int -> !torch.int
    %4760 = torch.prim.ListConstruct %4759 : (!torch.int) -> !torch.list<int>
    %4761 = torch.aten.view %4753, %4760 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %4761, [%292], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int32_5498 = torch.constant.int 32
    %int2_5499 = torch.constant.int 2
    %int32_5500 = torch.constant.int 32
    %int8_5501 = torch.constant.int 8
    %int128_5502 = torch.constant.int 128
    %4762 = torch.prim.ListConstruct %416, %int32_5498, %int2_5499, %int32_5500, %int8_5501, %int128_5502 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4763 = torch.aten.view %4595, %4762 : !torch.vtensor<[?,2097152],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f16>
    torch.bind_symbolic_shape %4763, [%293], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f16>
    %int32_5503 = torch.constant.int 32
    %4764 = torch.aten.mul.int %416, %int32_5503 : !torch.int, !torch.int -> !torch.int
    %int2_5504 = torch.constant.int 2
    %4765 = torch.aten.mul.int %4764, %int2_5504 : !torch.int, !torch.int -> !torch.int
    %int32_5505 = torch.constant.int 32
    %int8_5506 = torch.constant.int 8
    %int128_5507 = torch.constant.int 128
    %4766 = torch.prim.ListConstruct %4765, %int32_5505, %int8_5506, %int128_5507 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4767 = torch.aten.view %4763, %4766 : !torch.vtensor<[?,32,2,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %4767, [%293], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %4768 = torch.prim.ListConstruct %4761 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_5508 = torch.constant.bool false
    %4769 = torch.aten.index_put %4767, %4768, %4758, %false_5508 : !torch.vtensor<[?,32,8,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,32,8,128],f16>, !torch.bool -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %4769, [%293], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int32_5509 = torch.constant.int 32
    %int2_5510 = torch.constant.int 2
    %int32_5511 = torch.constant.int 32
    %int8_5512 = torch.constant.int 8
    %int128_5513 = torch.constant.int 128
    %4770 = torch.prim.ListConstruct %416, %int32_5509, %int2_5510, %int32_5511, %int8_5512, %int128_5513 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4771 = torch.aten.view %4769, %4770 : !torch.vtensor<[?,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f16>
    torch.bind_symbolic_shape %4771, [%293], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f16>
    %int2097152_5514 = torch.constant.int 2097152
    %4772 = torch.prim.ListConstruct %416, %int2097152_5514 : (!torch.int, !torch.int) -> !torch.list<int>
    %4773 = torch.aten.view %4771, %4772 : !torch.vtensor<[?,32,2,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,2097152],f16>
    torch.bind_symbolic_shape %4773, [%293], affine_map<()[s0] -> (s0, 2097152)> : !torch.vtensor<[?,2097152],f16>
    %int32_5515 = torch.constant.int 32
    %int2_5516 = torch.constant.int 2
    %int32_5517 = torch.constant.int 32
    %int8_5518 = torch.constant.int 8
    %int128_5519 = torch.constant.int 128
    %4774 = torch.prim.ListConstruct %416, %int32_5515, %int2_5516, %int32_5517, %int8_5518, %int128_5519 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4775 = torch.aten.view %4773, %4774 : !torch.vtensor<[?,2097152],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f16>
    torch.bind_symbolic_shape %4775, [%293], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f16>
    %int32_5520 = torch.constant.int 32
    %int8_5521 = torch.constant.int 8
    %int128_5522 = torch.constant.int 128
    %4776 = torch.prim.ListConstruct %4765, %int32_5520, %int8_5521, %int128_5522 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4777 = torch.aten.view %4775, %4776 : !torch.vtensor<[?,32,2,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %4777, [%293], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int4_5523 = torch.constant.int 4
    %int32_5524 = torch.constant.int 32
    %int8_5525 = torch.constant.int 8
    %int128_5526 = torch.constant.int 128
    %4778 = torch.prim.ListConstruct %int4_5523, %425, %int32_5524, %int8_5525, %int128_5526 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4779 = torch.aten.view %4693, %4778 : !torch.vtensor<[4,?,8,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,8,128],f16>
    torch.bind_symbolic_shape %4779, [%292], affine_map<()[s0] -> (4, s0, 32, 8, 128)> : !torch.vtensor<[4,?,32,8,128],f16>
    %int4_5527 = torch.constant.int 4
    %4780 = torch.aten.mul.int %int4_5527, %425 : !torch.int, !torch.int -> !torch.int
    %int32_5528 = torch.constant.int 32
    %int8_5529 = torch.constant.int 8
    %int128_5530 = torch.constant.int 128
    %4781 = torch.prim.ListConstruct %4780, %int32_5528, %int8_5529, %int128_5530 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4782 = torch.aten.view %4779, %4781 : !torch.vtensor<[4,?,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %4782, [%292], affine_map<()[s0] -> (s0 * 4, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int1_5531 = torch.constant.int 1
    %int1_5532 = torch.constant.int 1
    %4783 = torch.aten.add.Scalar %4753, %int1_5531, %int1_5532 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %4783, [%292], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int4_5533 = torch.constant.int 4
    %4784 = torch.aten.mul.int %int4_5533, %425 : !torch.int, !torch.int -> !torch.int
    %4785 = torch.prim.ListConstruct %4784 : (!torch.int) -> !torch.list<int>
    %4786 = torch.aten.view %4783, %4785 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %4786, [%292], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %4787 = torch.prim.ListConstruct %4786 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_5534 = torch.constant.bool false
    %4788 = torch.aten.index_put %4777, %4787, %4782, %false_5534 : !torch.vtensor<[?,32,8,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,32,8,128],f16>, !torch.bool -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %4788, [%293], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int32_5535 = torch.constant.int 32
    %int2_5536 = torch.constant.int 2
    %int32_5537 = torch.constant.int 32
    %int8_5538 = torch.constant.int 8
    %int128_5539 = torch.constant.int 128
    %4789 = torch.prim.ListConstruct %416, %int32_5535, %int2_5536, %int32_5537, %int8_5538, %int128_5539 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4790 = torch.aten.view %4788, %4789 : !torch.vtensor<[?,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f16>
    torch.bind_symbolic_shape %4790, [%293], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f16>
    %int2097152_5540 = torch.constant.int 2097152
    %4791 = torch.prim.ListConstruct %416, %int2097152_5540 : (!torch.int, !torch.int) -> !torch.list<int>
    %4792 = torch.aten.view %4790, %4791 : !torch.vtensor<[?,32,2,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,2097152],f16>
    torch.bind_symbolic_shape %4792, [%293], affine_map<()[s0] -> (s0, 2097152)> : !torch.vtensor<[?,2097152],f16>
    %int-2_5541 = torch.constant.int -2
    %4793 = torch.aten.unsqueeze %4751, %int-2_5541 : !torch.vtensor<[4,?,8,128],f16>, !torch.int -> !torch.vtensor<[4,?,8,1,128],f16>
    torch.bind_symbolic_shape %4793, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 1, 128)> : !torch.vtensor<[4,?,8,1,128],f16>
    %int4_5542 = torch.constant.int 4
    %int8_5543 = torch.constant.int 8
    %int4_5544 = torch.constant.int 4
    %int128_5545 = torch.constant.int 128
    %4794 = torch.prim.ListConstruct %int4_5542, %4737, %int8_5543, %int4_5544, %int128_5545 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_5546 = torch.constant.bool false
    %4795 = torch.aten.expand %4793, %4794, %false_5546 : !torch.vtensor<[4,?,8,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,8,4,128],f16>
    torch.bind_symbolic_shape %4795, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 4, 128)> : !torch.vtensor<[4,?,8,4,128],f16>
    %int0_5547 = torch.constant.int 0
    %4796 = torch.aten.clone %4795, %int0_5547 : !torch.vtensor<[4,?,8,4,128],f16>, !torch.int -> !torch.vtensor<[4,?,8,4,128],f16>
    torch.bind_symbolic_shape %4796, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 4, 128)> : !torch.vtensor<[4,?,8,4,128],f16>
    %int4_5548 = torch.constant.int 4
    %int32_5549 = torch.constant.int 32
    %int128_5550 = torch.constant.int 128
    %4797 = torch.prim.ListConstruct %int4_5548, %4737, %int32_5549, %int128_5550 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4798 = torch.aten._unsafe_view %4796, %4797 : !torch.vtensor<[4,?,8,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %4798, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int-2_5551 = torch.constant.int -2
    %4799 = torch.aten.unsqueeze %4693, %int-2_5551 : !torch.vtensor<[4,?,8,128],f16>, !torch.int -> !torch.vtensor<[4,?,8,1,128],f16>
    torch.bind_symbolic_shape %4799, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 1, 128)> : !torch.vtensor<[4,?,8,1,128],f16>
    %int1_5552 = torch.constant.int 1
    %4800 = torch.aten.size.int %4687, %int1_5552 : !torch.vtensor<[4,?,1024],f16>, !torch.int -> !torch.int
    %int4_5553 = torch.constant.int 4
    %int8_5554 = torch.constant.int 8
    %int4_5555 = torch.constant.int 4
    %int128_5556 = torch.constant.int 128
    %4801 = torch.prim.ListConstruct %int4_5553, %4800, %int8_5554, %int4_5555, %int128_5556 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_5557 = torch.constant.bool false
    %4802 = torch.aten.expand %4799, %4801, %false_5557 : !torch.vtensor<[4,?,8,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,8,4,128],f16>
    torch.bind_symbolic_shape %4802, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 4, 128)> : !torch.vtensor<[4,?,8,4,128],f16>
    %int0_5558 = torch.constant.int 0
    %4803 = torch.aten.clone %4802, %int0_5558 : !torch.vtensor<[4,?,8,4,128],f16>, !torch.int -> !torch.vtensor<[4,?,8,4,128],f16>
    torch.bind_symbolic_shape %4803, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 4, 128)> : !torch.vtensor<[4,?,8,4,128],f16>
    %int4_5559 = torch.constant.int 4
    %int32_5560 = torch.constant.int 32
    %int128_5561 = torch.constant.int 128
    %4804 = torch.prim.ListConstruct %int4_5559, %4800, %int32_5560, %int128_5561 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4805 = torch.aten._unsafe_view %4803, %4804 : !torch.vtensor<[4,?,8,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %4805, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int1_5562 = torch.constant.int 1
    %int2_5563 = torch.constant.int 2
    %4806 = torch.aten.transpose.int %4722, %int1_5562, %int2_5563 : !torch.vtensor<[4,?,32,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,32,?,128],f16>
    torch.bind_symbolic_shape %4806, [%292], affine_map<()[s0] -> (4, 32, s0 * 32, 128)> : !torch.vtensor<[4,32,?,128],f16>
    %int1_5564 = torch.constant.int 1
    %int2_5565 = torch.constant.int 2
    %4807 = torch.aten.transpose.int %4798, %int1_5564, %int2_5565 : !torch.vtensor<[4,?,32,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,32,?,128],f16>
    torch.bind_symbolic_shape %4807, [%292], affine_map<()[s0] -> (4, 32, s0 * 32, 128)> : !torch.vtensor<[4,32,?,128],f16>
    %int1_5566 = torch.constant.int 1
    %int2_5567 = torch.constant.int 2
    %4808 = torch.aten.transpose.int %4805, %int1_5566, %int2_5567 : !torch.vtensor<[4,?,32,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,32,?,128],f16>
    torch.bind_symbolic_shape %4808, [%292], affine_map<()[s0] -> (4, 32, s0 * 32, 128)> : !torch.vtensor<[4,32,?,128],f16>
    %float0.000000e00_5568 = torch.constant.float 0.000000e+00
    %false_5569 = torch.constant.bool false
    %none_5570 = torch.constant.none
    %4809:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%4806, %4807, %4808, %float0.000000e00_5568, %false_5569, %320, %none_5570) : (!torch.vtensor<[4,32,?,128],f16>, !torch.vtensor<[4,32,?,128],f16>, !torch.vtensor<[4,32,?,128],f16>, !torch.float, !torch.bool, !torch.vtensor<[4,1,?,?],f16>, !torch.none) -> (!torch.vtensor<[4,32,?,128],f16>, !torch.vtensor<[4,32,?],f32>) 
    torch.bind_symbolic_shape %4809#0, [%292], affine_map<()[s0] -> (4, 32, s0 * 32, 128)> : !torch.vtensor<[4,32,?,128],f16>
    %int1_5571 = torch.constant.int 1
    %int2_5572 = torch.constant.int 2
    %4810 = torch.aten.transpose.int %4809#0, %int1_5571, %int2_5572 : !torch.vtensor<[4,32,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %4810, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int4_5573 = torch.constant.int 4
    %int4096_5574 = torch.constant.int 4096
    %4811 = torch.prim.ListConstruct %int4_5573, %4708, %int4096_5574 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4812 = torch.aten.view %4810, %4811 : !torch.vtensor<[4,?,32,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4812, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int-2_5575 = torch.constant.int -2
    %int-1_5576 = torch.constant.int -1
    %4813 = torch.aten.transpose.int %203, %int-2_5575, %int-1_5576 : !torch.vtensor<[4096,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,4096],f16>
    %int4_5577 = torch.constant.int 4
    %4814 = torch.aten.mul.int %int4_5577, %4708 : !torch.int, !torch.int -> !torch.int
    %int4096_5578 = torch.constant.int 4096
    %4815 = torch.prim.ListConstruct %4814, %int4096_5578 : (!torch.int, !torch.int) -> !torch.list<int>
    %4816 = torch.aten.view %4812, %4815 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %4816, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %4817 = torch.aten.mm %4816, %4813 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %4817, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_5579 = torch.constant.int 4
    %int4096_5580 = torch.constant.int 4096
    %4818 = torch.prim.ListConstruct %int4_5579, %4708, %int4096_5580 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4819 = torch.aten.view %4817, %4818 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4819, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_5581 = torch.constant.int 1
    %4820 = torch.aten.add.Tensor %4657, %4819, %int1_5581 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4820, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int6_5582 = torch.constant.int 6
    %4821 = torch.prims.convert_element_type %4820, %int6_5582 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %4821, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_5583 = torch.constant.int 2
    %4822 = torch.aten.pow.Tensor_Scalar %4821, %int2_5583 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %4822, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int-1_5584 = torch.constant.int -1
    %4823 = torch.prim.ListConstruct %int-1_5584 : (!torch.int) -> !torch.list<int>
    %true_5585 = torch.constant.bool true
    %none_5586 = torch.constant.none
    %4824 = torch.aten.mean.dim %4822, %4823, %true_5585, %none_5586 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %4824, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_5587 = torch.constant.float 9.9999997473787516E-6
    %int1_5588 = torch.constant.int 1
    %4825 = torch.aten.add.Scalar %4824, %float9.999990e-06_5587, %int1_5588 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %4825, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %4826 = torch.aten.rsqrt %4825 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %4826, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %4827 = torch.aten.mul.Tensor %4821, %4826 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %4827, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %4828 = torch.aten.mul.Tensor %204, %4827 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %4828, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int5_5589 = torch.constant.int 5
    %4829 = torch.prims.convert_element_type %4828, %int5_5589 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4829, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int-2_5590 = torch.constant.int -2
    %int-1_5591 = torch.constant.int -1
    %4830 = torch.aten.transpose.int %205, %int-2_5590, %int-1_5591 : !torch.vtensor<[14336,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,14336],f16>
    %int4_5592 = torch.constant.int 4
    %4831 = torch.aten.mul.int %int4_5592, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_5593 = torch.constant.int 4096
    %4832 = torch.prim.ListConstruct %4831, %int4096_5593 : (!torch.int, !torch.int) -> !torch.list<int>
    %4833 = torch.aten.view %4829, %4832 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %4833, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %4834 = torch.aten.mm %4833, %4830 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,14336],f16> -> !torch.vtensor<[?,14336],f16>
    torch.bind_symbolic_shape %4834, [%292], affine_map<()[s0] -> (s0 * 128, 14336)> : !torch.vtensor<[?,14336],f16>
    %int4_5594 = torch.constant.int 4
    %int14336_5595 = torch.constant.int 14336
    %4835 = torch.prim.ListConstruct %int4_5594, %294, %int14336_5595 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4836 = torch.aten.view %4834, %4835 : !torch.vtensor<[?,14336],f16>, !torch.list<int> -> !torch.vtensor<[4,?,14336],f16>
    torch.bind_symbolic_shape %4836, [%292], affine_map<()[s0] -> (4, s0 * 32, 14336)> : !torch.vtensor<[4,?,14336],f16>
    %4837 = torch.aten.silu %4836 : !torch.vtensor<[4,?,14336],f16> -> !torch.vtensor<[4,?,14336],f16>
    torch.bind_symbolic_shape %4837, [%292], affine_map<()[s0] -> (4, s0 * 32, 14336)> : !torch.vtensor<[4,?,14336],f16>
    %int-2_5596 = torch.constant.int -2
    %int-1_5597 = torch.constant.int -1
    %4838 = torch.aten.transpose.int %206, %int-2_5596, %int-1_5597 : !torch.vtensor<[14336,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,14336],f16>
    %int4_5598 = torch.constant.int 4
    %4839 = torch.aten.mul.int %int4_5598, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_5599 = torch.constant.int 4096
    %4840 = torch.prim.ListConstruct %4839, %int4096_5599 : (!torch.int, !torch.int) -> !torch.list<int>
    %4841 = torch.aten.view %4829, %4840 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %4841, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %4842 = torch.aten.mm %4841, %4838 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,14336],f16> -> !torch.vtensor<[?,14336],f16>
    torch.bind_symbolic_shape %4842, [%292], affine_map<()[s0] -> (s0 * 128, 14336)> : !torch.vtensor<[?,14336],f16>
    %int4_5600 = torch.constant.int 4
    %int14336_5601 = torch.constant.int 14336
    %4843 = torch.prim.ListConstruct %int4_5600, %294, %int14336_5601 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4844 = torch.aten.view %4842, %4843 : !torch.vtensor<[?,14336],f16>, !torch.list<int> -> !torch.vtensor<[4,?,14336],f16>
    torch.bind_symbolic_shape %4844, [%292], affine_map<()[s0] -> (4, s0 * 32, 14336)> : !torch.vtensor<[4,?,14336],f16>
    %4845 = torch.aten.mul.Tensor %4837, %4844 : !torch.vtensor<[4,?,14336],f16>, !torch.vtensor<[4,?,14336],f16> -> !torch.vtensor<[4,?,14336],f16>
    torch.bind_symbolic_shape %4845, [%292], affine_map<()[s0] -> (4, s0 * 32, 14336)> : !torch.vtensor<[4,?,14336],f16>
    %int-2_5602 = torch.constant.int -2
    %int-1_5603 = torch.constant.int -1
    %4846 = torch.aten.transpose.int %207, %int-2_5602, %int-1_5603 : !torch.vtensor<[4096,14336],f16>, !torch.int, !torch.int -> !torch.vtensor<[14336,4096],f16>
    %int1_5604 = torch.constant.int 1
    %4847 = torch.aten.size.int %4836, %int1_5604 : !torch.vtensor<[4,?,14336],f16>, !torch.int -> !torch.int
    %int4_5605 = torch.constant.int 4
    %4848 = torch.aten.mul.int %int4_5605, %4847 : !torch.int, !torch.int -> !torch.int
    %int14336_5606 = torch.constant.int 14336
    %4849 = torch.prim.ListConstruct %4848, %int14336_5606 : (!torch.int, !torch.int) -> !torch.list<int>
    %4850 = torch.aten.view %4845, %4849 : !torch.vtensor<[4,?,14336],f16>, !torch.list<int> -> !torch.vtensor<[?,14336],f16>
    torch.bind_symbolic_shape %4850, [%292], affine_map<()[s0] -> (s0 * 128, 14336)> : !torch.vtensor<[?,14336],f16>
    %4851 = torch.aten.mm %4850, %4846 : !torch.vtensor<[?,14336],f16>, !torch.vtensor<[14336,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %4851, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_5607 = torch.constant.int 4
    %int4096_5608 = torch.constant.int 4096
    %4852 = torch.prim.ListConstruct %int4_5607, %4847, %int4096_5608 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4853 = torch.aten.view %4851, %4852 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4853, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_5609 = torch.constant.int 1
    %4854 = torch.aten.add.Tensor %4820, %4853, %int1_5609 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4854, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int6_5610 = torch.constant.int 6
    %4855 = torch.prims.convert_element_type %4854, %int6_5610 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %4855, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_5611 = torch.constant.int 2
    %4856 = torch.aten.pow.Tensor_Scalar %4855, %int2_5611 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %4856, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int-1_5612 = torch.constant.int -1
    %4857 = torch.prim.ListConstruct %int-1_5612 : (!torch.int) -> !torch.list<int>
    %true_5613 = torch.constant.bool true
    %none_5614 = torch.constant.none
    %4858 = torch.aten.mean.dim %4856, %4857, %true_5613, %none_5614 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %4858, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_5615 = torch.constant.float 9.9999997473787516E-6
    %int1_5616 = torch.constant.int 1
    %4859 = torch.aten.add.Scalar %4858, %float9.999990e-06_5615, %int1_5616 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %4859, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %4860 = torch.aten.rsqrt %4859 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %4860, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %4861 = torch.aten.mul.Tensor %4855, %4860 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %4861, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %4862 = torch.aten.mul.Tensor %208, %4861 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %4862, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int5_5617 = torch.constant.int 5
    %4863 = torch.prims.convert_element_type %4862, %int5_5617 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4863, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int-2_5618 = torch.constant.int -2
    %int-1_5619 = torch.constant.int -1
    %4864 = torch.aten.transpose.int %209, %int-2_5618, %int-1_5619 : !torch.vtensor<[4096,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,4096],f16>
    %int4_5620 = torch.constant.int 4
    %4865 = torch.aten.mul.int %int4_5620, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_5621 = torch.constant.int 4096
    %4866 = torch.prim.ListConstruct %4865, %int4096_5621 : (!torch.int, !torch.int) -> !torch.list<int>
    %4867 = torch.aten.view %4863, %4866 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %4867, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %4868 = torch.aten.mm %4867, %4864 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %4868, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_5622 = torch.constant.int 4
    %int4096_5623 = torch.constant.int 4096
    %4869 = torch.prim.ListConstruct %int4_5622, %294, %int4096_5623 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4870 = torch.aten.view %4868, %4869 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4870, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int-2_5624 = torch.constant.int -2
    %int-1_5625 = torch.constant.int -1
    %4871 = torch.aten.transpose.int %210, %int-2_5624, %int-1_5625 : !torch.vtensor<[1024,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,1024],f16>
    %int4_5626 = torch.constant.int 4
    %4872 = torch.aten.mul.int %int4_5626, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_5627 = torch.constant.int 4096
    %4873 = torch.prim.ListConstruct %4872, %int4096_5627 : (!torch.int, !torch.int) -> !torch.list<int>
    %4874 = torch.aten.view %4863, %4873 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %4874, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %4875 = torch.aten.mm %4874, %4871 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1024],f16> -> !torch.vtensor<[?,1024],f16>
    torch.bind_symbolic_shape %4875, [%292], affine_map<()[s0] -> (s0 * 128, 1024)> : !torch.vtensor<[?,1024],f16>
    %int4_5628 = torch.constant.int 4
    %int1024_5629 = torch.constant.int 1024
    %4876 = torch.prim.ListConstruct %int4_5628, %294, %int1024_5629 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4877 = torch.aten.view %4875, %4876 : !torch.vtensor<[?,1024],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1024],f16>
    torch.bind_symbolic_shape %4877, [%292], affine_map<()[s0] -> (4, s0 * 32, 1024)> : !torch.vtensor<[4,?,1024],f16>
    %int-2_5630 = torch.constant.int -2
    %int-1_5631 = torch.constant.int -1
    %4878 = torch.aten.transpose.int %211, %int-2_5630, %int-1_5631 : !torch.vtensor<[1024,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,1024],f16>
    %int4_5632 = torch.constant.int 4
    %4879 = torch.aten.mul.int %int4_5632, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_5633 = torch.constant.int 4096
    %4880 = torch.prim.ListConstruct %4879, %int4096_5633 : (!torch.int, !torch.int) -> !torch.list<int>
    %4881 = torch.aten.view %4863, %4880 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %4881, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %4882 = torch.aten.mm %4881, %4878 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1024],f16> -> !torch.vtensor<[?,1024],f16>
    torch.bind_symbolic_shape %4882, [%292], affine_map<()[s0] -> (s0 * 128, 1024)> : !torch.vtensor<[?,1024],f16>
    %int4_5634 = torch.constant.int 4
    %int1024_5635 = torch.constant.int 1024
    %4883 = torch.prim.ListConstruct %int4_5634, %294, %int1024_5635 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4884 = torch.aten.view %4882, %4883 : !torch.vtensor<[?,1024],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1024],f16>
    torch.bind_symbolic_shape %4884, [%292], affine_map<()[s0] -> (4, s0 * 32, 1024)> : !torch.vtensor<[4,?,1024],f16>
    %int4_5636 = torch.constant.int 4
    %int32_5637 = torch.constant.int 32
    %int128_5638 = torch.constant.int 128
    %4885 = torch.prim.ListConstruct %int4_5636, %294, %int32_5637, %int128_5638 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4886 = torch.aten.view %4870, %4885 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %4886, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int4_5639 = torch.constant.int 4
    %int8_5640 = torch.constant.int 8
    %int128_5641 = torch.constant.int 128
    %4887 = torch.prim.ListConstruct %int4_5639, %294, %int8_5640, %int128_5641 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4888 = torch.aten.view %4877, %4887 : !torch.vtensor<[4,?,1024],f16>, !torch.list<int> -> !torch.vtensor<[4,?,8,128],f16>
    torch.bind_symbolic_shape %4888, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 128)> : !torch.vtensor<[4,?,8,128],f16>
    %int4_5642 = torch.constant.int 4
    %int8_5643 = torch.constant.int 8
    %int128_5644 = torch.constant.int 128
    %4889 = torch.prim.ListConstruct %int4_5642, %294, %int8_5643, %int128_5644 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4890 = torch.aten.view %4884, %4889 : !torch.vtensor<[4,?,1024],f16>, !torch.list<int> -> !torch.vtensor<[4,?,8,128],f16>
    torch.bind_symbolic_shape %4890, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 128)> : !torch.vtensor<[4,?,8,128],f16>
    %int131072_5645 = torch.constant.int 131072
    %none_5646 = torch.constant.none
    %none_5647 = torch.constant.none
    %cpu_5648 = torch.constant.device "cpu"
    %false_5649 = torch.constant.bool false
    %4891 = torch.aten.arange %int131072_5645, %none_5646, %none_5647, %cpu_5648, %false_5649 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64>
    %int0_5650 = torch.constant.int 0
    %int128_5651 = torch.constant.int 128
    %int2_5652 = torch.constant.int 2
    %none_5653 = torch.constant.none
    %none_5654 = torch.constant.none
    %cpu_5655 = torch.constant.device "cpu"
    %false_5656 = torch.constant.bool false
    %4892 = torch.aten.arange.start_step %int0_5650, %int128_5651, %int2_5652, %none_5653, %none_5654, %cpu_5655, %false_5656 : !torch.int, !torch.int, !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64>
    %int0_5657 = torch.constant.int 0
    %int0_5658 = torch.constant.int 0
    %int64_5659 = torch.constant.int 64
    %int1_5660 = torch.constant.int 1
    %4893 = torch.aten.slice.Tensor %4892, %int0_5657, %int0_5658, %int64_5659, %int1_5660 : !torch.vtensor<[64],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[64],si64>
    %int6_5661 = torch.constant.int 6
    %4894 = torch.prims.convert_element_type %4893, %int6_5661 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32>
    %int128_5662 = torch.constant.int 128
    %4895 = torch.aten.div.Scalar %4894, %int128_5662 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32>
    %float5.000000e05_5663 = torch.constant.float 5.000000e+05
    %4896 = torch.aten.pow.Scalar %float5.000000e05_5663, %4895 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %4897 = torch.aten.reciprocal %4896 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %float1.000000e00_5664 = torch.constant.float 1.000000e+00
    %4898 = torch.aten.mul.Scalar %4897, %float1.000000e00_5664 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32>
    %int131072_5665 = torch.constant.int 131072
    %int1_5666 = torch.constant.int 1
    %4899 = torch.prim.ListConstruct %int131072_5665, %int1_5666 : (!torch.int, !torch.int) -> !torch.list<int>
    %4900 = torch.aten.view %4891, %4899 : !torch.vtensor<[131072],si64>, !torch.list<int> -> !torch.vtensor<[131072,1],si64>
    %4901 = torch.aten.mul.Tensor %4900, %4898 : !torch.vtensor<[131072,1],si64>, !torch.vtensor<[64],f32> -> !torch.vtensor<[131072,64],f32>
    %4902 = torch.aten.cos %4901 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %4903 = torch.aten.sin %4901 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %4904 = torch.aten.complex %4902, %4903 : !torch.vtensor<[131072,64],f32>, !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],complex<f32>>
    %int1_5667 = torch.constant.int 1
    %4905 = torch.aten.size.int %4870, %int1_5667 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.int
    %int0_5668 = torch.constant.int 0
    %4906 = torch.aten.add.int %int0_5668, %4905 : !torch.int, !torch.int -> !torch.int
    %int0_5669 = torch.constant.int 0
    %int0_5670 = torch.constant.int 0
    %int1_5671 = torch.constant.int 1
    %4907 = torch.aten.slice.Tensor %4904, %int0_5669, %int0_5670, %4906, %int1_5671 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %4907, [%292], affine_map<()[s0] -> (s0 * 32, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_5672 = torch.constant.int 1
    %int0_5673 = torch.constant.int 0
    %int9223372036854775807_5674 = torch.constant.int 9223372036854775807
    %int1_5675 = torch.constant.int 1
    %4908 = torch.aten.slice.Tensor %4907, %int1_5672, %int0_5673, %int9223372036854775807_5674, %int1_5675 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %4908, [%292], affine_map<()[s0] -> (s0 * 32, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_5676 = torch.constant.int 0
    %4909 = torch.aten.unsqueeze %4908, %int0_5676 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %4909, [%292], affine_map<()[s0] -> (1, s0 * 32, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_5677 = torch.constant.int 2
    %4910 = torch.aten.unsqueeze %4909, %int2_5677 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %4910, [%292], affine_map<()[s0] -> (1, s0 * 32, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_5678 = torch.constant.int 3
    %int0_5679 = torch.constant.int 0
    %int9223372036854775807_5680 = torch.constant.int 9223372036854775807
    %int1_5681 = torch.constant.int 1
    %4911 = torch.aten.slice.Tensor %4910, %int3_5678, %int0_5679, %int9223372036854775807_5680, %int1_5681 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %4911, [%292], affine_map<()[s0] -> (1, s0 * 32, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %4912 = torch_c.to_builtin_tensor %4886 : !torch.vtensor<[4,?,32,128],f16> -> tensor<4x?x32x128xf16>
    %c1_5682 = arith.constant 1 : index
    %dim_5683 = tensor.dim %4912, %c1_5682 : tensor<4x?x32x128xf16>
    %4913 = flow.tensor.bitcast %4912 : tensor<4x?x32x128xf16>{%dim_5683} -> tensor<4x?x32x64xcomplex<f16>>{%dim_5683}
    %4914 = torch_c.from_builtin_tensor %4913 : tensor<4x?x32x64xcomplex<f16>> -> !torch.vtensor<[4,?,32,64],complex<f16>>
    torch.bind_symbolic_shape %4914, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 64)> : !torch.vtensor<[4,?,32,64],complex<f16>>
    %4915 = torch.aten.mul.Tensor %4914, %4911 : !torch.vtensor<[4,?,32,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,32,64],complex<f32>>
    torch.bind_symbolic_shape %4915, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 64)> : !torch.vtensor<[4,?,32,64],complex<f32>>
    %4916 = torch_c.to_builtin_tensor %4915 : !torch.vtensor<[4,?,32,64],complex<f32>> -> tensor<4x?x32x64xcomplex<f32>>
    %c1_5684 = arith.constant 1 : index
    %dim_5685 = tensor.dim %4916, %c1_5684 : tensor<4x?x32x64xcomplex<f32>>
    %4917 = flow.tensor.bitcast %4916 : tensor<4x?x32x64xcomplex<f32>>{%dim_5685} -> tensor<4x?x32x128xf32>{%dim_5685}
    %4918 = torch_c.from_builtin_tensor %4917 : tensor<4x?x32x128xf32> -> !torch.vtensor<[4,?,32,128],f32>
    torch.bind_symbolic_shape %4918, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f32>
    %int5_5686 = torch.constant.int 5
    %4919 = torch.prims.convert_element_type %4918, %int5_5686 : !torch.vtensor<[4,?,32,128],f32>, !torch.int -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %4919, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int131072_5687 = torch.constant.int 131072
    %none_5688 = torch.constant.none
    %none_5689 = torch.constant.none
    %cpu_5690 = torch.constant.device "cpu"
    %false_5691 = torch.constant.bool false
    %4920 = torch.aten.arange %int131072_5687, %none_5688, %none_5689, %cpu_5690, %false_5691 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64>
    %int0_5692 = torch.constant.int 0
    %int128_5693 = torch.constant.int 128
    %int2_5694 = torch.constant.int 2
    %none_5695 = torch.constant.none
    %none_5696 = torch.constant.none
    %cpu_5697 = torch.constant.device "cpu"
    %false_5698 = torch.constant.bool false
    %4921 = torch.aten.arange.start_step %int0_5692, %int128_5693, %int2_5694, %none_5695, %none_5696, %cpu_5697, %false_5698 : !torch.int, !torch.int, !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64>
    %int0_5699 = torch.constant.int 0
    %int0_5700 = torch.constant.int 0
    %int64_5701 = torch.constant.int 64
    %int1_5702 = torch.constant.int 1
    %4922 = torch.aten.slice.Tensor %4921, %int0_5699, %int0_5700, %int64_5701, %int1_5702 : !torch.vtensor<[64],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[64],si64>
    %int6_5703 = torch.constant.int 6
    %4923 = torch.prims.convert_element_type %4922, %int6_5703 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32>
    %int128_5704 = torch.constant.int 128
    %4924 = torch.aten.div.Scalar %4923, %int128_5704 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32>
    %float5.000000e05_5705 = torch.constant.float 5.000000e+05
    %4925 = torch.aten.pow.Scalar %float5.000000e05_5705, %4924 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %4926 = torch.aten.reciprocal %4925 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %float1.000000e00_5706 = torch.constant.float 1.000000e+00
    %4927 = torch.aten.mul.Scalar %4926, %float1.000000e00_5706 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32>
    %int131072_5707 = torch.constant.int 131072
    %int1_5708 = torch.constant.int 1
    %4928 = torch.prim.ListConstruct %int131072_5707, %int1_5708 : (!torch.int, !torch.int) -> !torch.list<int>
    %4929 = torch.aten.view %4920, %4928 : !torch.vtensor<[131072],si64>, !torch.list<int> -> !torch.vtensor<[131072,1],si64>
    %4930 = torch.aten.mul.Tensor %4929, %4927 : !torch.vtensor<[131072,1],si64>, !torch.vtensor<[64],f32> -> !torch.vtensor<[131072,64],f32>
    %4931 = torch.aten.cos %4930 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %4932 = torch.aten.sin %4930 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %4933 = torch.aten.complex %4931, %4932 : !torch.vtensor<[131072,64],f32>, !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],complex<f32>>
    %int1_5709 = torch.constant.int 1
    %4934 = torch.aten.size.int %4877, %int1_5709 : !torch.vtensor<[4,?,1024],f16>, !torch.int -> !torch.int
    %int0_5710 = torch.constant.int 0
    %4935 = torch.aten.add.int %int0_5710, %4934 : !torch.int, !torch.int -> !torch.int
    %int0_5711 = torch.constant.int 0
    %int0_5712 = torch.constant.int 0
    %int1_5713 = torch.constant.int 1
    %4936 = torch.aten.slice.Tensor %4933, %int0_5711, %int0_5712, %4935, %int1_5713 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %4936, [%292], affine_map<()[s0] -> (s0 * 32, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_5714 = torch.constant.int 1
    %int0_5715 = torch.constant.int 0
    %int9223372036854775807_5716 = torch.constant.int 9223372036854775807
    %int1_5717 = torch.constant.int 1
    %4937 = torch.aten.slice.Tensor %4936, %int1_5714, %int0_5715, %int9223372036854775807_5716, %int1_5717 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %4937, [%292], affine_map<()[s0] -> (s0 * 32, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_5718 = torch.constant.int 0
    %4938 = torch.aten.unsqueeze %4937, %int0_5718 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %4938, [%292], affine_map<()[s0] -> (1, s0 * 32, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_5719 = torch.constant.int 2
    %4939 = torch.aten.unsqueeze %4938, %int2_5719 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %4939, [%292], affine_map<()[s0] -> (1, s0 * 32, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_5720 = torch.constant.int 3
    %int0_5721 = torch.constant.int 0
    %int9223372036854775807_5722 = torch.constant.int 9223372036854775807
    %int1_5723 = torch.constant.int 1
    %4940 = torch.aten.slice.Tensor %4939, %int3_5720, %int0_5721, %int9223372036854775807_5722, %int1_5723 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %4940, [%292], affine_map<()[s0] -> (1, s0 * 32, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %4941 = torch_c.to_builtin_tensor %4888 : !torch.vtensor<[4,?,8,128],f16> -> tensor<4x?x8x128xf16>
    %c1_5724 = arith.constant 1 : index
    %dim_5725 = tensor.dim %4941, %c1_5724 : tensor<4x?x8x128xf16>
    %4942 = flow.tensor.bitcast %4941 : tensor<4x?x8x128xf16>{%dim_5725} -> tensor<4x?x8x64xcomplex<f16>>{%dim_5725}
    %4943 = torch_c.from_builtin_tensor %4942 : tensor<4x?x8x64xcomplex<f16>> -> !torch.vtensor<[4,?,8,64],complex<f16>>
    torch.bind_symbolic_shape %4943, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 64)> : !torch.vtensor<[4,?,8,64],complex<f16>>
    %4944 = torch.aten.mul.Tensor %4943, %4940 : !torch.vtensor<[4,?,8,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,8,64],complex<f32>>
    torch.bind_symbolic_shape %4944, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 64)> : !torch.vtensor<[4,?,8,64],complex<f32>>
    %4945 = torch_c.to_builtin_tensor %4944 : !torch.vtensor<[4,?,8,64],complex<f32>> -> tensor<4x?x8x64xcomplex<f32>>
    %c1_5726 = arith.constant 1 : index
    %dim_5727 = tensor.dim %4945, %c1_5726 : tensor<4x?x8x64xcomplex<f32>>
    %4946 = flow.tensor.bitcast %4945 : tensor<4x?x8x64xcomplex<f32>>{%dim_5727} -> tensor<4x?x8x128xf32>{%dim_5727}
    %4947 = torch_c.from_builtin_tensor %4946 : tensor<4x?x8x128xf32> -> !torch.vtensor<[4,?,8,128],f32>
    torch.bind_symbolic_shape %4947, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 128)> : !torch.vtensor<[4,?,8,128],f32>
    %int5_5728 = torch.constant.int 5
    %4948 = torch.prims.convert_element_type %4947, %int5_5728 : !torch.vtensor<[4,?,8,128],f32>, !torch.int -> !torch.vtensor<[4,?,8,128],f16>
    torch.bind_symbolic_shape %4948, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 128)> : !torch.vtensor<[4,?,8,128],f16>
    %int64_5729 = torch.constant.int 64
    %4949 = torch.aten.mul.Scalar %arg2, %int64_5729 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %4949, [%292], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int46 = torch.constant.int 46
    %int1_5730 = torch.constant.int 1
    %4950 = torch.aten.add.Scalar %4949, %int46, %int1_5730 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %4950, [%292], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int4_5731 = torch.constant.int 4
    %int32_5732 = torch.constant.int 32
    %int8_5733 = torch.constant.int 8
    %int128_5734 = torch.constant.int 128
    %4951 = torch.prim.ListConstruct %int4_5731, %425, %int32_5732, %int8_5733, %int128_5734 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4952 = torch.aten.view %4948, %4951 : !torch.vtensor<[4,?,8,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,8,128],f16>
    torch.bind_symbolic_shape %4952, [%292], affine_map<()[s0] -> (4, s0, 32, 8, 128)> : !torch.vtensor<[4,?,32,8,128],f16>
    %int4_5735 = torch.constant.int 4
    %4953 = torch.aten.mul.int %int4_5735, %425 : !torch.int, !torch.int -> !torch.int
    %int32_5736 = torch.constant.int 32
    %int8_5737 = torch.constant.int 8
    %int128_5738 = torch.constant.int 128
    %4954 = torch.prim.ListConstruct %4953, %int32_5736, %int8_5737, %int128_5738 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4955 = torch.aten.view %4952, %4954 : !torch.vtensor<[4,?,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %4955, [%292], affine_map<()[s0] -> (s0 * 4, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int4_5739 = torch.constant.int 4
    %4956 = torch.aten.mul.int %int4_5739, %425 : !torch.int, !torch.int -> !torch.int
    %4957 = torch.prim.ListConstruct %4956 : (!torch.int) -> !torch.list<int>
    %4958 = torch.aten.view %4950, %4957 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %4958, [%292], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int32_5740 = torch.constant.int 32
    %int2_5741 = torch.constant.int 2
    %int32_5742 = torch.constant.int 32
    %int8_5743 = torch.constant.int 8
    %int128_5744 = torch.constant.int 128
    %4959 = torch.prim.ListConstruct %416, %int32_5740, %int2_5741, %int32_5742, %int8_5743, %int128_5744 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4960 = torch.aten.view %4792, %4959 : !torch.vtensor<[?,2097152],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f16>
    torch.bind_symbolic_shape %4960, [%293], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f16>
    %int32_5745 = torch.constant.int 32
    %4961 = torch.aten.mul.int %416, %int32_5745 : !torch.int, !torch.int -> !torch.int
    %int2_5746 = torch.constant.int 2
    %4962 = torch.aten.mul.int %4961, %int2_5746 : !torch.int, !torch.int -> !torch.int
    %int32_5747 = torch.constant.int 32
    %int8_5748 = torch.constant.int 8
    %int128_5749 = torch.constant.int 128
    %4963 = torch.prim.ListConstruct %4962, %int32_5747, %int8_5748, %int128_5749 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4964 = torch.aten.view %4960, %4963 : !torch.vtensor<[?,32,2,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %4964, [%293], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %4965 = torch.prim.ListConstruct %4958 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_5750 = torch.constant.bool false
    %4966 = torch.aten.index_put %4964, %4965, %4955, %false_5750 : !torch.vtensor<[?,32,8,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,32,8,128],f16>, !torch.bool -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %4966, [%293], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int32_5751 = torch.constant.int 32
    %int2_5752 = torch.constant.int 2
    %int32_5753 = torch.constant.int 32
    %int8_5754 = torch.constant.int 8
    %int128_5755 = torch.constant.int 128
    %4967 = torch.prim.ListConstruct %416, %int32_5751, %int2_5752, %int32_5753, %int8_5754, %int128_5755 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4968 = torch.aten.view %4966, %4967 : !torch.vtensor<[?,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f16>
    torch.bind_symbolic_shape %4968, [%293], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f16>
    %int2097152_5756 = torch.constant.int 2097152
    %4969 = torch.prim.ListConstruct %416, %int2097152_5756 : (!torch.int, !torch.int) -> !torch.list<int>
    %4970 = torch.aten.view %4968, %4969 : !torch.vtensor<[?,32,2,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,2097152],f16>
    torch.bind_symbolic_shape %4970, [%293], affine_map<()[s0] -> (s0, 2097152)> : !torch.vtensor<[?,2097152],f16>
    %int32_5757 = torch.constant.int 32
    %int2_5758 = torch.constant.int 2
    %int32_5759 = torch.constant.int 32
    %int8_5760 = torch.constant.int 8
    %int128_5761 = torch.constant.int 128
    %4971 = torch.prim.ListConstruct %416, %int32_5757, %int2_5758, %int32_5759, %int8_5760, %int128_5761 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4972 = torch.aten.view %4970, %4971 : !torch.vtensor<[?,2097152],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f16>
    torch.bind_symbolic_shape %4972, [%293], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f16>
    %int32_5762 = torch.constant.int 32
    %int8_5763 = torch.constant.int 8
    %int128_5764 = torch.constant.int 128
    %4973 = torch.prim.ListConstruct %4962, %int32_5762, %int8_5763, %int128_5764 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4974 = torch.aten.view %4972, %4973 : !torch.vtensor<[?,32,2,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %4974, [%293], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int4_5765 = torch.constant.int 4
    %int32_5766 = torch.constant.int 32
    %int8_5767 = torch.constant.int 8
    %int128_5768 = torch.constant.int 128
    %4975 = torch.prim.ListConstruct %int4_5765, %425, %int32_5766, %int8_5767, %int128_5768 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4976 = torch.aten.view %4890, %4975 : !torch.vtensor<[4,?,8,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,8,128],f16>
    torch.bind_symbolic_shape %4976, [%292], affine_map<()[s0] -> (4, s0, 32, 8, 128)> : !torch.vtensor<[4,?,32,8,128],f16>
    %int4_5769 = torch.constant.int 4
    %4977 = torch.aten.mul.int %int4_5769, %425 : !torch.int, !torch.int -> !torch.int
    %int32_5770 = torch.constant.int 32
    %int8_5771 = torch.constant.int 8
    %int128_5772 = torch.constant.int 128
    %4978 = torch.prim.ListConstruct %4977, %int32_5770, %int8_5771, %int128_5772 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4979 = torch.aten.view %4976, %4978 : !torch.vtensor<[4,?,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %4979, [%292], affine_map<()[s0] -> (s0 * 4, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int1_5773 = torch.constant.int 1
    %int1_5774 = torch.constant.int 1
    %4980 = torch.aten.add.Scalar %4950, %int1_5773, %int1_5774 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %4980, [%292], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int4_5775 = torch.constant.int 4
    %4981 = torch.aten.mul.int %int4_5775, %425 : !torch.int, !torch.int -> !torch.int
    %4982 = torch.prim.ListConstruct %4981 : (!torch.int) -> !torch.list<int>
    %4983 = torch.aten.view %4980, %4982 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %4983, [%292], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %4984 = torch.prim.ListConstruct %4983 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_5776 = torch.constant.bool false
    %4985 = torch.aten.index_put %4974, %4984, %4979, %false_5776 : !torch.vtensor<[?,32,8,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,32,8,128],f16>, !torch.bool -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %4985, [%293], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int32_5777 = torch.constant.int 32
    %int2_5778 = torch.constant.int 2
    %int32_5779 = torch.constant.int 32
    %int8_5780 = torch.constant.int 8
    %int128_5781 = torch.constant.int 128
    %4986 = torch.prim.ListConstruct %416, %int32_5777, %int2_5778, %int32_5779, %int8_5780, %int128_5781 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4987 = torch.aten.view %4985, %4986 : !torch.vtensor<[?,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f16>
    torch.bind_symbolic_shape %4987, [%293], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f16>
    %int2097152_5782 = torch.constant.int 2097152
    %4988 = torch.prim.ListConstruct %416, %int2097152_5782 : (!torch.int, !torch.int) -> !torch.list<int>
    %4989 = torch.aten.view %4987, %4988 : !torch.vtensor<[?,32,2,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,2097152],f16>
    torch.bind_symbolic_shape %4989, [%293], affine_map<()[s0] -> (s0, 2097152)> : !torch.vtensor<[?,2097152],f16>
    %int-2_5783 = torch.constant.int -2
    %4990 = torch.aten.unsqueeze %4948, %int-2_5783 : !torch.vtensor<[4,?,8,128],f16>, !torch.int -> !torch.vtensor<[4,?,8,1,128],f16>
    torch.bind_symbolic_shape %4990, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 1, 128)> : !torch.vtensor<[4,?,8,1,128],f16>
    %int4_5784 = torch.constant.int 4
    %int8_5785 = torch.constant.int 8
    %int4_5786 = torch.constant.int 4
    %int128_5787 = torch.constant.int 128
    %4991 = torch.prim.ListConstruct %int4_5784, %4934, %int8_5785, %int4_5786, %int128_5787 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_5788 = torch.constant.bool false
    %4992 = torch.aten.expand %4990, %4991, %false_5788 : !torch.vtensor<[4,?,8,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,8,4,128],f16>
    torch.bind_symbolic_shape %4992, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 4, 128)> : !torch.vtensor<[4,?,8,4,128],f16>
    %int0_5789 = torch.constant.int 0
    %4993 = torch.aten.clone %4992, %int0_5789 : !torch.vtensor<[4,?,8,4,128],f16>, !torch.int -> !torch.vtensor<[4,?,8,4,128],f16>
    torch.bind_symbolic_shape %4993, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 4, 128)> : !torch.vtensor<[4,?,8,4,128],f16>
    %int4_5790 = torch.constant.int 4
    %int32_5791 = torch.constant.int 32
    %int128_5792 = torch.constant.int 128
    %4994 = torch.prim.ListConstruct %int4_5790, %4934, %int32_5791, %int128_5792 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4995 = torch.aten._unsafe_view %4993, %4994 : !torch.vtensor<[4,?,8,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %4995, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int-2_5793 = torch.constant.int -2
    %4996 = torch.aten.unsqueeze %4890, %int-2_5793 : !torch.vtensor<[4,?,8,128],f16>, !torch.int -> !torch.vtensor<[4,?,8,1,128],f16>
    torch.bind_symbolic_shape %4996, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 1, 128)> : !torch.vtensor<[4,?,8,1,128],f16>
    %int1_5794 = torch.constant.int 1
    %4997 = torch.aten.size.int %4884, %int1_5794 : !torch.vtensor<[4,?,1024],f16>, !torch.int -> !torch.int
    %int4_5795 = torch.constant.int 4
    %int8_5796 = torch.constant.int 8
    %int4_5797 = torch.constant.int 4
    %int128_5798 = torch.constant.int 128
    %4998 = torch.prim.ListConstruct %int4_5795, %4997, %int8_5796, %int4_5797, %int128_5798 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_5799 = torch.constant.bool false
    %4999 = torch.aten.expand %4996, %4998, %false_5799 : !torch.vtensor<[4,?,8,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,8,4,128],f16>
    torch.bind_symbolic_shape %4999, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 4, 128)> : !torch.vtensor<[4,?,8,4,128],f16>
    %int0_5800 = torch.constant.int 0
    %5000 = torch.aten.clone %4999, %int0_5800 : !torch.vtensor<[4,?,8,4,128],f16>, !torch.int -> !torch.vtensor<[4,?,8,4,128],f16>
    torch.bind_symbolic_shape %5000, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 4, 128)> : !torch.vtensor<[4,?,8,4,128],f16>
    %int4_5801 = torch.constant.int 4
    %int32_5802 = torch.constant.int 32
    %int128_5803 = torch.constant.int 128
    %5001 = torch.prim.ListConstruct %int4_5801, %4997, %int32_5802, %int128_5803 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5002 = torch.aten._unsafe_view %5000, %5001 : !torch.vtensor<[4,?,8,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %5002, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int1_5804 = torch.constant.int 1
    %int2_5805 = torch.constant.int 2
    %5003 = torch.aten.transpose.int %4919, %int1_5804, %int2_5805 : !torch.vtensor<[4,?,32,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,32,?,128],f16>
    torch.bind_symbolic_shape %5003, [%292], affine_map<()[s0] -> (4, 32, s0 * 32, 128)> : !torch.vtensor<[4,32,?,128],f16>
    %int1_5806 = torch.constant.int 1
    %int2_5807 = torch.constant.int 2
    %5004 = torch.aten.transpose.int %4995, %int1_5806, %int2_5807 : !torch.vtensor<[4,?,32,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,32,?,128],f16>
    torch.bind_symbolic_shape %5004, [%292], affine_map<()[s0] -> (4, 32, s0 * 32, 128)> : !torch.vtensor<[4,32,?,128],f16>
    %int1_5808 = torch.constant.int 1
    %int2_5809 = torch.constant.int 2
    %5005 = torch.aten.transpose.int %5002, %int1_5808, %int2_5809 : !torch.vtensor<[4,?,32,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,32,?,128],f16>
    torch.bind_symbolic_shape %5005, [%292], affine_map<()[s0] -> (4, 32, s0 * 32, 128)> : !torch.vtensor<[4,32,?,128],f16>
    %float0.000000e00_5810 = torch.constant.float 0.000000e+00
    %false_5811 = torch.constant.bool false
    %none_5812 = torch.constant.none
    %5006:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%5003, %5004, %5005, %float0.000000e00_5810, %false_5811, %320, %none_5812) : (!torch.vtensor<[4,32,?,128],f16>, !torch.vtensor<[4,32,?,128],f16>, !torch.vtensor<[4,32,?,128],f16>, !torch.float, !torch.bool, !torch.vtensor<[4,1,?,?],f16>, !torch.none) -> (!torch.vtensor<[4,32,?,128],f16>, !torch.vtensor<[4,32,?],f32>) 
    torch.bind_symbolic_shape %5006#0, [%292], affine_map<()[s0] -> (4, 32, s0 * 32, 128)> : !torch.vtensor<[4,32,?,128],f16>
    %int1_5813 = torch.constant.int 1
    %int2_5814 = torch.constant.int 2
    %5007 = torch.aten.transpose.int %5006#0, %int1_5813, %int2_5814 : !torch.vtensor<[4,32,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %5007, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int4_5815 = torch.constant.int 4
    %int4096_5816 = torch.constant.int 4096
    %5008 = torch.prim.ListConstruct %int4_5815, %4905, %int4096_5816 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5009 = torch.aten.view %5007, %5008 : !torch.vtensor<[4,?,32,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5009, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int-2_5817 = torch.constant.int -2
    %int-1_5818 = torch.constant.int -1
    %5010 = torch.aten.transpose.int %212, %int-2_5817, %int-1_5818 : !torch.vtensor<[4096,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,4096],f16>
    %int4_5819 = torch.constant.int 4
    %5011 = torch.aten.mul.int %int4_5819, %4905 : !torch.int, !torch.int -> !torch.int
    %int4096_5820 = torch.constant.int 4096
    %5012 = torch.prim.ListConstruct %5011, %int4096_5820 : (!torch.int, !torch.int) -> !torch.list<int>
    %5013 = torch.aten.view %5009, %5012 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %5013, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %5014 = torch.aten.mm %5013, %5010 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %5014, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_5821 = torch.constant.int 4
    %int4096_5822 = torch.constant.int 4096
    %5015 = torch.prim.ListConstruct %int4_5821, %4905, %int4096_5822 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5016 = torch.aten.view %5014, %5015 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5016, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_5823 = torch.constant.int 1
    %5017 = torch.aten.add.Tensor %4854, %5016, %int1_5823 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5017, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int6_5824 = torch.constant.int 6
    %5018 = torch.prims.convert_element_type %5017, %int6_5824 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %5018, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_5825 = torch.constant.int 2
    %5019 = torch.aten.pow.Tensor_Scalar %5018, %int2_5825 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %5019, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int-1_5826 = torch.constant.int -1
    %5020 = torch.prim.ListConstruct %int-1_5826 : (!torch.int) -> !torch.list<int>
    %true_5827 = torch.constant.bool true
    %none_5828 = torch.constant.none
    %5021 = torch.aten.mean.dim %5019, %5020, %true_5827, %none_5828 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %5021, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_5829 = torch.constant.float 9.9999997473787516E-6
    %int1_5830 = torch.constant.int 1
    %5022 = torch.aten.add.Scalar %5021, %float9.999990e-06_5829, %int1_5830 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %5022, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %5023 = torch.aten.rsqrt %5022 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %5023, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %5024 = torch.aten.mul.Tensor %5018, %5023 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %5024, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %5025 = torch.aten.mul.Tensor %213, %5024 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %5025, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int5_5831 = torch.constant.int 5
    %5026 = torch.prims.convert_element_type %5025, %int5_5831 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5026, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int-2_5832 = torch.constant.int -2
    %int-1_5833 = torch.constant.int -1
    %5027 = torch.aten.transpose.int %214, %int-2_5832, %int-1_5833 : !torch.vtensor<[14336,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,14336],f16>
    %int4_5834 = torch.constant.int 4
    %5028 = torch.aten.mul.int %int4_5834, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_5835 = torch.constant.int 4096
    %5029 = torch.prim.ListConstruct %5028, %int4096_5835 : (!torch.int, !torch.int) -> !torch.list<int>
    %5030 = torch.aten.view %5026, %5029 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %5030, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %5031 = torch.aten.mm %5030, %5027 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,14336],f16> -> !torch.vtensor<[?,14336],f16>
    torch.bind_symbolic_shape %5031, [%292], affine_map<()[s0] -> (s0 * 128, 14336)> : !torch.vtensor<[?,14336],f16>
    %int4_5836 = torch.constant.int 4
    %int14336_5837 = torch.constant.int 14336
    %5032 = torch.prim.ListConstruct %int4_5836, %294, %int14336_5837 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5033 = torch.aten.view %5031, %5032 : !torch.vtensor<[?,14336],f16>, !torch.list<int> -> !torch.vtensor<[4,?,14336],f16>
    torch.bind_symbolic_shape %5033, [%292], affine_map<()[s0] -> (4, s0 * 32, 14336)> : !torch.vtensor<[4,?,14336],f16>
    %5034 = torch.aten.silu %5033 : !torch.vtensor<[4,?,14336],f16> -> !torch.vtensor<[4,?,14336],f16>
    torch.bind_symbolic_shape %5034, [%292], affine_map<()[s0] -> (4, s0 * 32, 14336)> : !torch.vtensor<[4,?,14336],f16>
    %int-2_5838 = torch.constant.int -2
    %int-1_5839 = torch.constant.int -1
    %5035 = torch.aten.transpose.int %215, %int-2_5838, %int-1_5839 : !torch.vtensor<[14336,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,14336],f16>
    %int4_5840 = torch.constant.int 4
    %5036 = torch.aten.mul.int %int4_5840, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_5841 = torch.constant.int 4096
    %5037 = torch.prim.ListConstruct %5036, %int4096_5841 : (!torch.int, !torch.int) -> !torch.list<int>
    %5038 = torch.aten.view %5026, %5037 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %5038, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %5039 = torch.aten.mm %5038, %5035 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,14336],f16> -> !torch.vtensor<[?,14336],f16>
    torch.bind_symbolic_shape %5039, [%292], affine_map<()[s0] -> (s0 * 128, 14336)> : !torch.vtensor<[?,14336],f16>
    %int4_5842 = torch.constant.int 4
    %int14336_5843 = torch.constant.int 14336
    %5040 = torch.prim.ListConstruct %int4_5842, %294, %int14336_5843 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5041 = torch.aten.view %5039, %5040 : !torch.vtensor<[?,14336],f16>, !torch.list<int> -> !torch.vtensor<[4,?,14336],f16>
    torch.bind_symbolic_shape %5041, [%292], affine_map<()[s0] -> (4, s0 * 32, 14336)> : !torch.vtensor<[4,?,14336],f16>
    %5042 = torch.aten.mul.Tensor %5034, %5041 : !torch.vtensor<[4,?,14336],f16>, !torch.vtensor<[4,?,14336],f16> -> !torch.vtensor<[4,?,14336],f16>
    torch.bind_symbolic_shape %5042, [%292], affine_map<()[s0] -> (4, s0 * 32, 14336)> : !torch.vtensor<[4,?,14336],f16>
    %int-2_5844 = torch.constant.int -2
    %int-1_5845 = torch.constant.int -1
    %5043 = torch.aten.transpose.int %216, %int-2_5844, %int-1_5845 : !torch.vtensor<[4096,14336],f16>, !torch.int, !torch.int -> !torch.vtensor<[14336,4096],f16>
    %int1_5846 = torch.constant.int 1
    %5044 = torch.aten.size.int %5033, %int1_5846 : !torch.vtensor<[4,?,14336],f16>, !torch.int -> !torch.int
    %int4_5847 = torch.constant.int 4
    %5045 = torch.aten.mul.int %int4_5847, %5044 : !torch.int, !torch.int -> !torch.int
    %int14336_5848 = torch.constant.int 14336
    %5046 = torch.prim.ListConstruct %5045, %int14336_5848 : (!torch.int, !torch.int) -> !torch.list<int>
    %5047 = torch.aten.view %5042, %5046 : !torch.vtensor<[4,?,14336],f16>, !torch.list<int> -> !torch.vtensor<[?,14336],f16>
    torch.bind_symbolic_shape %5047, [%292], affine_map<()[s0] -> (s0 * 128, 14336)> : !torch.vtensor<[?,14336],f16>
    %5048 = torch.aten.mm %5047, %5043 : !torch.vtensor<[?,14336],f16>, !torch.vtensor<[14336,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %5048, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_5849 = torch.constant.int 4
    %int4096_5850 = torch.constant.int 4096
    %5049 = torch.prim.ListConstruct %int4_5849, %5044, %int4096_5850 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5050 = torch.aten.view %5048, %5049 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5050, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_5851 = torch.constant.int 1
    %5051 = torch.aten.add.Tensor %5017, %5050, %int1_5851 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5051, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int6_5852 = torch.constant.int 6
    %5052 = torch.prims.convert_element_type %5051, %int6_5852 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %5052, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_5853 = torch.constant.int 2
    %5053 = torch.aten.pow.Tensor_Scalar %5052, %int2_5853 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %5053, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int-1_5854 = torch.constant.int -1
    %5054 = torch.prim.ListConstruct %int-1_5854 : (!torch.int) -> !torch.list<int>
    %true_5855 = torch.constant.bool true
    %none_5856 = torch.constant.none
    %5055 = torch.aten.mean.dim %5053, %5054, %true_5855, %none_5856 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %5055, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_5857 = torch.constant.float 9.9999997473787516E-6
    %int1_5858 = torch.constant.int 1
    %5056 = torch.aten.add.Scalar %5055, %float9.999990e-06_5857, %int1_5858 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %5056, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %5057 = torch.aten.rsqrt %5056 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %5057, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %5058 = torch.aten.mul.Tensor %5052, %5057 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %5058, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %5059 = torch.aten.mul.Tensor %217, %5058 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %5059, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int5_5859 = torch.constant.int 5
    %5060 = torch.prims.convert_element_type %5059, %int5_5859 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5060, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int-2_5860 = torch.constant.int -2
    %int-1_5861 = torch.constant.int -1
    %5061 = torch.aten.transpose.int %218, %int-2_5860, %int-1_5861 : !torch.vtensor<[4096,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,4096],f16>
    %int4_5862 = torch.constant.int 4
    %5062 = torch.aten.mul.int %int4_5862, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_5863 = torch.constant.int 4096
    %5063 = torch.prim.ListConstruct %5062, %int4096_5863 : (!torch.int, !torch.int) -> !torch.list<int>
    %5064 = torch.aten.view %5060, %5063 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %5064, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %5065 = torch.aten.mm %5064, %5061 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %5065, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_5864 = torch.constant.int 4
    %int4096_5865 = torch.constant.int 4096
    %5066 = torch.prim.ListConstruct %int4_5864, %294, %int4096_5865 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5067 = torch.aten.view %5065, %5066 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5067, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int-2_5866 = torch.constant.int -2
    %int-1_5867 = torch.constant.int -1
    %5068 = torch.aten.transpose.int %219, %int-2_5866, %int-1_5867 : !torch.vtensor<[1024,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,1024],f16>
    %int4_5868 = torch.constant.int 4
    %5069 = torch.aten.mul.int %int4_5868, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_5869 = torch.constant.int 4096
    %5070 = torch.prim.ListConstruct %5069, %int4096_5869 : (!torch.int, !torch.int) -> !torch.list<int>
    %5071 = torch.aten.view %5060, %5070 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %5071, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %5072 = torch.aten.mm %5071, %5068 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1024],f16> -> !torch.vtensor<[?,1024],f16>
    torch.bind_symbolic_shape %5072, [%292], affine_map<()[s0] -> (s0 * 128, 1024)> : !torch.vtensor<[?,1024],f16>
    %int4_5870 = torch.constant.int 4
    %int1024_5871 = torch.constant.int 1024
    %5073 = torch.prim.ListConstruct %int4_5870, %294, %int1024_5871 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5074 = torch.aten.view %5072, %5073 : !torch.vtensor<[?,1024],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1024],f16>
    torch.bind_symbolic_shape %5074, [%292], affine_map<()[s0] -> (4, s0 * 32, 1024)> : !torch.vtensor<[4,?,1024],f16>
    %int-2_5872 = torch.constant.int -2
    %int-1_5873 = torch.constant.int -1
    %5075 = torch.aten.transpose.int %220, %int-2_5872, %int-1_5873 : !torch.vtensor<[1024,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,1024],f16>
    %int4_5874 = torch.constant.int 4
    %5076 = torch.aten.mul.int %int4_5874, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_5875 = torch.constant.int 4096
    %5077 = torch.prim.ListConstruct %5076, %int4096_5875 : (!torch.int, !torch.int) -> !torch.list<int>
    %5078 = torch.aten.view %5060, %5077 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %5078, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %5079 = torch.aten.mm %5078, %5075 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1024],f16> -> !torch.vtensor<[?,1024],f16>
    torch.bind_symbolic_shape %5079, [%292], affine_map<()[s0] -> (s0 * 128, 1024)> : !torch.vtensor<[?,1024],f16>
    %int4_5876 = torch.constant.int 4
    %int1024_5877 = torch.constant.int 1024
    %5080 = torch.prim.ListConstruct %int4_5876, %294, %int1024_5877 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5081 = torch.aten.view %5079, %5080 : !torch.vtensor<[?,1024],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1024],f16>
    torch.bind_symbolic_shape %5081, [%292], affine_map<()[s0] -> (4, s0 * 32, 1024)> : !torch.vtensor<[4,?,1024],f16>
    %int4_5878 = torch.constant.int 4
    %int32_5879 = torch.constant.int 32
    %int128_5880 = torch.constant.int 128
    %5082 = torch.prim.ListConstruct %int4_5878, %294, %int32_5879, %int128_5880 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5083 = torch.aten.view %5067, %5082 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %5083, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int4_5881 = torch.constant.int 4
    %int8_5882 = torch.constant.int 8
    %int128_5883 = torch.constant.int 128
    %5084 = torch.prim.ListConstruct %int4_5881, %294, %int8_5882, %int128_5883 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5085 = torch.aten.view %5074, %5084 : !torch.vtensor<[4,?,1024],f16>, !torch.list<int> -> !torch.vtensor<[4,?,8,128],f16>
    torch.bind_symbolic_shape %5085, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 128)> : !torch.vtensor<[4,?,8,128],f16>
    %int4_5884 = torch.constant.int 4
    %int8_5885 = torch.constant.int 8
    %int128_5886 = torch.constant.int 128
    %5086 = torch.prim.ListConstruct %int4_5884, %294, %int8_5885, %int128_5886 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5087 = torch.aten.view %5081, %5086 : !torch.vtensor<[4,?,1024],f16>, !torch.list<int> -> !torch.vtensor<[4,?,8,128],f16>
    torch.bind_symbolic_shape %5087, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 128)> : !torch.vtensor<[4,?,8,128],f16>
    %int131072_5887 = torch.constant.int 131072
    %none_5888 = torch.constant.none
    %none_5889 = torch.constant.none
    %cpu_5890 = torch.constant.device "cpu"
    %false_5891 = torch.constant.bool false
    %5088 = torch.aten.arange %int131072_5887, %none_5888, %none_5889, %cpu_5890, %false_5891 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64>
    %int0_5892 = torch.constant.int 0
    %int128_5893 = torch.constant.int 128
    %int2_5894 = torch.constant.int 2
    %none_5895 = torch.constant.none
    %none_5896 = torch.constant.none
    %cpu_5897 = torch.constant.device "cpu"
    %false_5898 = torch.constant.bool false
    %5089 = torch.aten.arange.start_step %int0_5892, %int128_5893, %int2_5894, %none_5895, %none_5896, %cpu_5897, %false_5898 : !torch.int, !torch.int, !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64>
    %int0_5899 = torch.constant.int 0
    %int0_5900 = torch.constant.int 0
    %int64_5901 = torch.constant.int 64
    %int1_5902 = torch.constant.int 1
    %5090 = torch.aten.slice.Tensor %5089, %int0_5899, %int0_5900, %int64_5901, %int1_5902 : !torch.vtensor<[64],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[64],si64>
    %int6_5903 = torch.constant.int 6
    %5091 = torch.prims.convert_element_type %5090, %int6_5903 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32>
    %int128_5904 = torch.constant.int 128
    %5092 = torch.aten.div.Scalar %5091, %int128_5904 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32>
    %float5.000000e05_5905 = torch.constant.float 5.000000e+05
    %5093 = torch.aten.pow.Scalar %float5.000000e05_5905, %5092 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %5094 = torch.aten.reciprocal %5093 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %float1.000000e00_5906 = torch.constant.float 1.000000e+00
    %5095 = torch.aten.mul.Scalar %5094, %float1.000000e00_5906 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32>
    %int131072_5907 = torch.constant.int 131072
    %int1_5908 = torch.constant.int 1
    %5096 = torch.prim.ListConstruct %int131072_5907, %int1_5908 : (!torch.int, !torch.int) -> !torch.list<int>
    %5097 = torch.aten.view %5088, %5096 : !torch.vtensor<[131072],si64>, !torch.list<int> -> !torch.vtensor<[131072,1],si64>
    %5098 = torch.aten.mul.Tensor %5097, %5095 : !torch.vtensor<[131072,1],si64>, !torch.vtensor<[64],f32> -> !torch.vtensor<[131072,64],f32>
    %5099 = torch.aten.cos %5098 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %5100 = torch.aten.sin %5098 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %5101 = torch.aten.complex %5099, %5100 : !torch.vtensor<[131072,64],f32>, !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],complex<f32>>
    %int1_5909 = torch.constant.int 1
    %5102 = torch.aten.size.int %5067, %int1_5909 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.int
    %int0_5910 = torch.constant.int 0
    %5103 = torch.aten.add.int %int0_5910, %5102 : !torch.int, !torch.int -> !torch.int
    %int0_5911 = torch.constant.int 0
    %int0_5912 = torch.constant.int 0
    %int1_5913 = torch.constant.int 1
    %5104 = torch.aten.slice.Tensor %5101, %int0_5911, %int0_5912, %5103, %int1_5913 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %5104, [%292], affine_map<()[s0] -> (s0 * 32, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_5914 = torch.constant.int 1
    %int0_5915 = torch.constant.int 0
    %int9223372036854775807_5916 = torch.constant.int 9223372036854775807
    %int1_5917 = torch.constant.int 1
    %5105 = torch.aten.slice.Tensor %5104, %int1_5914, %int0_5915, %int9223372036854775807_5916, %int1_5917 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %5105, [%292], affine_map<()[s0] -> (s0 * 32, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_5918 = torch.constant.int 0
    %5106 = torch.aten.unsqueeze %5105, %int0_5918 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %5106, [%292], affine_map<()[s0] -> (1, s0 * 32, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_5919 = torch.constant.int 2
    %5107 = torch.aten.unsqueeze %5106, %int2_5919 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %5107, [%292], affine_map<()[s0] -> (1, s0 * 32, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_5920 = torch.constant.int 3
    %int0_5921 = torch.constant.int 0
    %int9223372036854775807_5922 = torch.constant.int 9223372036854775807
    %int1_5923 = torch.constant.int 1
    %5108 = torch.aten.slice.Tensor %5107, %int3_5920, %int0_5921, %int9223372036854775807_5922, %int1_5923 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %5108, [%292], affine_map<()[s0] -> (1, s0 * 32, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %5109 = torch_c.to_builtin_tensor %5083 : !torch.vtensor<[4,?,32,128],f16> -> tensor<4x?x32x128xf16>
    %c1_5924 = arith.constant 1 : index
    %dim_5925 = tensor.dim %5109, %c1_5924 : tensor<4x?x32x128xf16>
    %5110 = flow.tensor.bitcast %5109 : tensor<4x?x32x128xf16>{%dim_5925} -> tensor<4x?x32x64xcomplex<f16>>{%dim_5925}
    %5111 = torch_c.from_builtin_tensor %5110 : tensor<4x?x32x64xcomplex<f16>> -> !torch.vtensor<[4,?,32,64],complex<f16>>
    torch.bind_symbolic_shape %5111, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 64)> : !torch.vtensor<[4,?,32,64],complex<f16>>
    %5112 = torch.aten.mul.Tensor %5111, %5108 : !torch.vtensor<[4,?,32,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,32,64],complex<f32>>
    torch.bind_symbolic_shape %5112, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 64)> : !torch.vtensor<[4,?,32,64],complex<f32>>
    %5113 = torch_c.to_builtin_tensor %5112 : !torch.vtensor<[4,?,32,64],complex<f32>> -> tensor<4x?x32x64xcomplex<f32>>
    %c1_5926 = arith.constant 1 : index
    %dim_5927 = tensor.dim %5113, %c1_5926 : tensor<4x?x32x64xcomplex<f32>>
    %5114 = flow.tensor.bitcast %5113 : tensor<4x?x32x64xcomplex<f32>>{%dim_5927} -> tensor<4x?x32x128xf32>{%dim_5927}
    %5115 = torch_c.from_builtin_tensor %5114 : tensor<4x?x32x128xf32> -> !torch.vtensor<[4,?,32,128],f32>
    torch.bind_symbolic_shape %5115, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f32>
    %int5_5928 = torch.constant.int 5
    %5116 = torch.prims.convert_element_type %5115, %int5_5928 : !torch.vtensor<[4,?,32,128],f32>, !torch.int -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %5116, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int131072_5929 = torch.constant.int 131072
    %none_5930 = torch.constant.none
    %none_5931 = torch.constant.none
    %cpu_5932 = torch.constant.device "cpu"
    %false_5933 = torch.constant.bool false
    %5117 = torch.aten.arange %int131072_5929, %none_5930, %none_5931, %cpu_5932, %false_5933 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64>
    %int0_5934 = torch.constant.int 0
    %int128_5935 = torch.constant.int 128
    %int2_5936 = torch.constant.int 2
    %none_5937 = torch.constant.none
    %none_5938 = torch.constant.none
    %cpu_5939 = torch.constant.device "cpu"
    %false_5940 = torch.constant.bool false
    %5118 = torch.aten.arange.start_step %int0_5934, %int128_5935, %int2_5936, %none_5937, %none_5938, %cpu_5939, %false_5940 : !torch.int, !torch.int, !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64>
    %int0_5941 = torch.constant.int 0
    %int0_5942 = torch.constant.int 0
    %int64_5943 = torch.constant.int 64
    %int1_5944 = torch.constant.int 1
    %5119 = torch.aten.slice.Tensor %5118, %int0_5941, %int0_5942, %int64_5943, %int1_5944 : !torch.vtensor<[64],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[64],si64>
    %int6_5945 = torch.constant.int 6
    %5120 = torch.prims.convert_element_type %5119, %int6_5945 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32>
    %int128_5946 = torch.constant.int 128
    %5121 = torch.aten.div.Scalar %5120, %int128_5946 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32>
    %float5.000000e05_5947 = torch.constant.float 5.000000e+05
    %5122 = torch.aten.pow.Scalar %float5.000000e05_5947, %5121 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %5123 = torch.aten.reciprocal %5122 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %float1.000000e00_5948 = torch.constant.float 1.000000e+00
    %5124 = torch.aten.mul.Scalar %5123, %float1.000000e00_5948 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32>
    %int131072_5949 = torch.constant.int 131072
    %int1_5950 = torch.constant.int 1
    %5125 = torch.prim.ListConstruct %int131072_5949, %int1_5950 : (!torch.int, !torch.int) -> !torch.list<int>
    %5126 = torch.aten.view %5117, %5125 : !torch.vtensor<[131072],si64>, !torch.list<int> -> !torch.vtensor<[131072,1],si64>
    %5127 = torch.aten.mul.Tensor %5126, %5124 : !torch.vtensor<[131072,1],si64>, !torch.vtensor<[64],f32> -> !torch.vtensor<[131072,64],f32>
    %5128 = torch.aten.cos %5127 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %5129 = torch.aten.sin %5127 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %5130 = torch.aten.complex %5128, %5129 : !torch.vtensor<[131072,64],f32>, !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],complex<f32>>
    %int1_5951 = torch.constant.int 1
    %5131 = torch.aten.size.int %5074, %int1_5951 : !torch.vtensor<[4,?,1024],f16>, !torch.int -> !torch.int
    %int0_5952 = torch.constant.int 0
    %5132 = torch.aten.add.int %int0_5952, %5131 : !torch.int, !torch.int -> !torch.int
    %int0_5953 = torch.constant.int 0
    %int0_5954 = torch.constant.int 0
    %int1_5955 = torch.constant.int 1
    %5133 = torch.aten.slice.Tensor %5130, %int0_5953, %int0_5954, %5132, %int1_5955 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %5133, [%292], affine_map<()[s0] -> (s0 * 32, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_5956 = torch.constant.int 1
    %int0_5957 = torch.constant.int 0
    %int9223372036854775807_5958 = torch.constant.int 9223372036854775807
    %int1_5959 = torch.constant.int 1
    %5134 = torch.aten.slice.Tensor %5133, %int1_5956, %int0_5957, %int9223372036854775807_5958, %int1_5959 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %5134, [%292], affine_map<()[s0] -> (s0 * 32, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_5960 = torch.constant.int 0
    %5135 = torch.aten.unsqueeze %5134, %int0_5960 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %5135, [%292], affine_map<()[s0] -> (1, s0 * 32, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_5961 = torch.constant.int 2
    %5136 = torch.aten.unsqueeze %5135, %int2_5961 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %5136, [%292], affine_map<()[s0] -> (1, s0 * 32, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_5962 = torch.constant.int 3
    %int0_5963 = torch.constant.int 0
    %int9223372036854775807_5964 = torch.constant.int 9223372036854775807
    %int1_5965 = torch.constant.int 1
    %5137 = torch.aten.slice.Tensor %5136, %int3_5962, %int0_5963, %int9223372036854775807_5964, %int1_5965 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %5137, [%292], affine_map<()[s0] -> (1, s0 * 32, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %5138 = torch_c.to_builtin_tensor %5085 : !torch.vtensor<[4,?,8,128],f16> -> tensor<4x?x8x128xf16>
    %c1_5966 = arith.constant 1 : index
    %dim_5967 = tensor.dim %5138, %c1_5966 : tensor<4x?x8x128xf16>
    %5139 = flow.tensor.bitcast %5138 : tensor<4x?x8x128xf16>{%dim_5967} -> tensor<4x?x8x64xcomplex<f16>>{%dim_5967}
    %5140 = torch_c.from_builtin_tensor %5139 : tensor<4x?x8x64xcomplex<f16>> -> !torch.vtensor<[4,?,8,64],complex<f16>>
    torch.bind_symbolic_shape %5140, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 64)> : !torch.vtensor<[4,?,8,64],complex<f16>>
    %5141 = torch.aten.mul.Tensor %5140, %5137 : !torch.vtensor<[4,?,8,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,8,64],complex<f32>>
    torch.bind_symbolic_shape %5141, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 64)> : !torch.vtensor<[4,?,8,64],complex<f32>>
    %5142 = torch_c.to_builtin_tensor %5141 : !torch.vtensor<[4,?,8,64],complex<f32>> -> tensor<4x?x8x64xcomplex<f32>>
    %c1_5968 = arith.constant 1 : index
    %dim_5969 = tensor.dim %5142, %c1_5968 : tensor<4x?x8x64xcomplex<f32>>
    %5143 = flow.tensor.bitcast %5142 : tensor<4x?x8x64xcomplex<f32>>{%dim_5969} -> tensor<4x?x8x128xf32>{%dim_5969}
    %5144 = torch_c.from_builtin_tensor %5143 : tensor<4x?x8x128xf32> -> !torch.vtensor<[4,?,8,128],f32>
    torch.bind_symbolic_shape %5144, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 128)> : !torch.vtensor<[4,?,8,128],f32>
    %int5_5970 = torch.constant.int 5
    %5145 = torch.prims.convert_element_type %5144, %int5_5970 : !torch.vtensor<[4,?,8,128],f32>, !torch.int -> !torch.vtensor<[4,?,8,128],f16>
    torch.bind_symbolic_shape %5145, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 128)> : !torch.vtensor<[4,?,8,128],f16>
    %int64_5971 = torch.constant.int 64
    %5146 = torch.aten.mul.Scalar %arg2, %int64_5971 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %5146, [%292], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int48 = torch.constant.int 48
    %int1_5972 = torch.constant.int 1
    %5147 = torch.aten.add.Scalar %5146, %int48, %int1_5972 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %5147, [%292], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int4_5973 = torch.constant.int 4
    %int32_5974 = torch.constant.int 32
    %int8_5975 = torch.constant.int 8
    %int128_5976 = torch.constant.int 128
    %5148 = torch.prim.ListConstruct %int4_5973, %425, %int32_5974, %int8_5975, %int128_5976 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5149 = torch.aten.view %5145, %5148 : !torch.vtensor<[4,?,8,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,8,128],f16>
    torch.bind_symbolic_shape %5149, [%292], affine_map<()[s0] -> (4, s0, 32, 8, 128)> : !torch.vtensor<[4,?,32,8,128],f16>
    %int4_5977 = torch.constant.int 4
    %5150 = torch.aten.mul.int %int4_5977, %425 : !torch.int, !torch.int -> !torch.int
    %int32_5978 = torch.constant.int 32
    %int8_5979 = torch.constant.int 8
    %int128_5980 = torch.constant.int 128
    %5151 = torch.prim.ListConstruct %5150, %int32_5978, %int8_5979, %int128_5980 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5152 = torch.aten.view %5149, %5151 : !torch.vtensor<[4,?,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %5152, [%292], affine_map<()[s0] -> (s0 * 4, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int4_5981 = torch.constant.int 4
    %5153 = torch.aten.mul.int %int4_5981, %425 : !torch.int, !torch.int -> !torch.int
    %5154 = torch.prim.ListConstruct %5153 : (!torch.int) -> !torch.list<int>
    %5155 = torch.aten.view %5147, %5154 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %5155, [%292], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int32_5982 = torch.constant.int 32
    %int2_5983 = torch.constant.int 2
    %int32_5984 = torch.constant.int 32
    %int8_5985 = torch.constant.int 8
    %int128_5986 = torch.constant.int 128
    %5156 = torch.prim.ListConstruct %416, %int32_5982, %int2_5983, %int32_5984, %int8_5985, %int128_5986 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5157 = torch.aten.view %4989, %5156 : !torch.vtensor<[?,2097152],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f16>
    torch.bind_symbolic_shape %5157, [%293], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f16>
    %int32_5987 = torch.constant.int 32
    %5158 = torch.aten.mul.int %416, %int32_5987 : !torch.int, !torch.int -> !torch.int
    %int2_5988 = torch.constant.int 2
    %5159 = torch.aten.mul.int %5158, %int2_5988 : !torch.int, !torch.int -> !torch.int
    %int32_5989 = torch.constant.int 32
    %int8_5990 = torch.constant.int 8
    %int128_5991 = torch.constant.int 128
    %5160 = torch.prim.ListConstruct %5159, %int32_5989, %int8_5990, %int128_5991 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5161 = torch.aten.view %5157, %5160 : !torch.vtensor<[?,32,2,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %5161, [%293], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %5162 = torch.prim.ListConstruct %5155 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_5992 = torch.constant.bool false
    %5163 = torch.aten.index_put %5161, %5162, %5152, %false_5992 : !torch.vtensor<[?,32,8,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,32,8,128],f16>, !torch.bool -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %5163, [%293], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int32_5993 = torch.constant.int 32
    %int2_5994 = torch.constant.int 2
    %int32_5995 = torch.constant.int 32
    %int8_5996 = torch.constant.int 8
    %int128_5997 = torch.constant.int 128
    %5164 = torch.prim.ListConstruct %416, %int32_5993, %int2_5994, %int32_5995, %int8_5996, %int128_5997 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5165 = torch.aten.view %5163, %5164 : !torch.vtensor<[?,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f16>
    torch.bind_symbolic_shape %5165, [%293], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f16>
    %int2097152_5998 = torch.constant.int 2097152
    %5166 = torch.prim.ListConstruct %416, %int2097152_5998 : (!torch.int, !torch.int) -> !torch.list<int>
    %5167 = torch.aten.view %5165, %5166 : !torch.vtensor<[?,32,2,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,2097152],f16>
    torch.bind_symbolic_shape %5167, [%293], affine_map<()[s0] -> (s0, 2097152)> : !torch.vtensor<[?,2097152],f16>
    %int32_5999 = torch.constant.int 32
    %int2_6000 = torch.constant.int 2
    %int32_6001 = torch.constant.int 32
    %int8_6002 = torch.constant.int 8
    %int128_6003 = torch.constant.int 128
    %5168 = torch.prim.ListConstruct %416, %int32_5999, %int2_6000, %int32_6001, %int8_6002, %int128_6003 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5169 = torch.aten.view %5167, %5168 : !torch.vtensor<[?,2097152],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f16>
    torch.bind_symbolic_shape %5169, [%293], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f16>
    %int32_6004 = torch.constant.int 32
    %int8_6005 = torch.constant.int 8
    %int128_6006 = torch.constant.int 128
    %5170 = torch.prim.ListConstruct %5159, %int32_6004, %int8_6005, %int128_6006 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5171 = torch.aten.view %5169, %5170 : !torch.vtensor<[?,32,2,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %5171, [%293], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int4_6007 = torch.constant.int 4
    %int32_6008 = torch.constant.int 32
    %int8_6009 = torch.constant.int 8
    %int128_6010 = torch.constant.int 128
    %5172 = torch.prim.ListConstruct %int4_6007, %425, %int32_6008, %int8_6009, %int128_6010 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5173 = torch.aten.view %5087, %5172 : !torch.vtensor<[4,?,8,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,8,128],f16>
    torch.bind_symbolic_shape %5173, [%292], affine_map<()[s0] -> (4, s0, 32, 8, 128)> : !torch.vtensor<[4,?,32,8,128],f16>
    %int4_6011 = torch.constant.int 4
    %5174 = torch.aten.mul.int %int4_6011, %425 : !torch.int, !torch.int -> !torch.int
    %int32_6012 = torch.constant.int 32
    %int8_6013 = torch.constant.int 8
    %int128_6014 = torch.constant.int 128
    %5175 = torch.prim.ListConstruct %5174, %int32_6012, %int8_6013, %int128_6014 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5176 = torch.aten.view %5173, %5175 : !torch.vtensor<[4,?,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %5176, [%292], affine_map<()[s0] -> (s0 * 4, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int1_6015 = torch.constant.int 1
    %int1_6016 = torch.constant.int 1
    %5177 = torch.aten.add.Scalar %5147, %int1_6015, %int1_6016 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %5177, [%292], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int4_6017 = torch.constant.int 4
    %5178 = torch.aten.mul.int %int4_6017, %425 : !torch.int, !torch.int -> !torch.int
    %5179 = torch.prim.ListConstruct %5178 : (!torch.int) -> !torch.list<int>
    %5180 = torch.aten.view %5177, %5179 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %5180, [%292], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %5181 = torch.prim.ListConstruct %5180 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_6018 = torch.constant.bool false
    %5182 = torch.aten.index_put %5171, %5181, %5176, %false_6018 : !torch.vtensor<[?,32,8,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,32,8,128],f16>, !torch.bool -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %5182, [%293], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int32_6019 = torch.constant.int 32
    %int2_6020 = torch.constant.int 2
    %int32_6021 = torch.constant.int 32
    %int8_6022 = torch.constant.int 8
    %int128_6023 = torch.constant.int 128
    %5183 = torch.prim.ListConstruct %416, %int32_6019, %int2_6020, %int32_6021, %int8_6022, %int128_6023 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5184 = torch.aten.view %5182, %5183 : !torch.vtensor<[?,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f16>
    torch.bind_symbolic_shape %5184, [%293], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f16>
    %int2097152_6024 = torch.constant.int 2097152
    %5185 = torch.prim.ListConstruct %416, %int2097152_6024 : (!torch.int, !torch.int) -> !torch.list<int>
    %5186 = torch.aten.view %5184, %5185 : !torch.vtensor<[?,32,2,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,2097152],f16>
    torch.bind_symbolic_shape %5186, [%293], affine_map<()[s0] -> (s0, 2097152)> : !torch.vtensor<[?,2097152],f16>
    %int-2_6025 = torch.constant.int -2
    %5187 = torch.aten.unsqueeze %5145, %int-2_6025 : !torch.vtensor<[4,?,8,128],f16>, !torch.int -> !torch.vtensor<[4,?,8,1,128],f16>
    torch.bind_symbolic_shape %5187, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 1, 128)> : !torch.vtensor<[4,?,8,1,128],f16>
    %int4_6026 = torch.constant.int 4
    %int8_6027 = torch.constant.int 8
    %int4_6028 = torch.constant.int 4
    %int128_6029 = torch.constant.int 128
    %5188 = torch.prim.ListConstruct %int4_6026, %5131, %int8_6027, %int4_6028, %int128_6029 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_6030 = torch.constant.bool false
    %5189 = torch.aten.expand %5187, %5188, %false_6030 : !torch.vtensor<[4,?,8,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,8,4,128],f16>
    torch.bind_symbolic_shape %5189, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 4, 128)> : !torch.vtensor<[4,?,8,4,128],f16>
    %int0_6031 = torch.constant.int 0
    %5190 = torch.aten.clone %5189, %int0_6031 : !torch.vtensor<[4,?,8,4,128],f16>, !torch.int -> !torch.vtensor<[4,?,8,4,128],f16>
    torch.bind_symbolic_shape %5190, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 4, 128)> : !torch.vtensor<[4,?,8,4,128],f16>
    %int4_6032 = torch.constant.int 4
    %int32_6033 = torch.constant.int 32
    %int128_6034 = torch.constant.int 128
    %5191 = torch.prim.ListConstruct %int4_6032, %5131, %int32_6033, %int128_6034 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5192 = torch.aten._unsafe_view %5190, %5191 : !torch.vtensor<[4,?,8,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %5192, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int-2_6035 = torch.constant.int -2
    %5193 = torch.aten.unsqueeze %5087, %int-2_6035 : !torch.vtensor<[4,?,8,128],f16>, !torch.int -> !torch.vtensor<[4,?,8,1,128],f16>
    torch.bind_symbolic_shape %5193, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 1, 128)> : !torch.vtensor<[4,?,8,1,128],f16>
    %int1_6036 = torch.constant.int 1
    %5194 = torch.aten.size.int %5081, %int1_6036 : !torch.vtensor<[4,?,1024],f16>, !torch.int -> !torch.int
    %int4_6037 = torch.constant.int 4
    %int8_6038 = torch.constant.int 8
    %int4_6039 = torch.constant.int 4
    %int128_6040 = torch.constant.int 128
    %5195 = torch.prim.ListConstruct %int4_6037, %5194, %int8_6038, %int4_6039, %int128_6040 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_6041 = torch.constant.bool false
    %5196 = torch.aten.expand %5193, %5195, %false_6041 : !torch.vtensor<[4,?,8,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,8,4,128],f16>
    torch.bind_symbolic_shape %5196, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 4, 128)> : !torch.vtensor<[4,?,8,4,128],f16>
    %int0_6042 = torch.constant.int 0
    %5197 = torch.aten.clone %5196, %int0_6042 : !torch.vtensor<[4,?,8,4,128],f16>, !torch.int -> !torch.vtensor<[4,?,8,4,128],f16>
    torch.bind_symbolic_shape %5197, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 4, 128)> : !torch.vtensor<[4,?,8,4,128],f16>
    %int4_6043 = torch.constant.int 4
    %int32_6044 = torch.constant.int 32
    %int128_6045 = torch.constant.int 128
    %5198 = torch.prim.ListConstruct %int4_6043, %5194, %int32_6044, %int128_6045 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5199 = torch.aten._unsafe_view %5197, %5198 : !torch.vtensor<[4,?,8,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %5199, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int1_6046 = torch.constant.int 1
    %int2_6047 = torch.constant.int 2
    %5200 = torch.aten.transpose.int %5116, %int1_6046, %int2_6047 : !torch.vtensor<[4,?,32,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,32,?,128],f16>
    torch.bind_symbolic_shape %5200, [%292], affine_map<()[s0] -> (4, 32, s0 * 32, 128)> : !torch.vtensor<[4,32,?,128],f16>
    %int1_6048 = torch.constant.int 1
    %int2_6049 = torch.constant.int 2
    %5201 = torch.aten.transpose.int %5192, %int1_6048, %int2_6049 : !torch.vtensor<[4,?,32,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,32,?,128],f16>
    torch.bind_symbolic_shape %5201, [%292], affine_map<()[s0] -> (4, 32, s0 * 32, 128)> : !torch.vtensor<[4,32,?,128],f16>
    %int1_6050 = torch.constant.int 1
    %int2_6051 = torch.constant.int 2
    %5202 = torch.aten.transpose.int %5199, %int1_6050, %int2_6051 : !torch.vtensor<[4,?,32,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,32,?,128],f16>
    torch.bind_symbolic_shape %5202, [%292], affine_map<()[s0] -> (4, 32, s0 * 32, 128)> : !torch.vtensor<[4,32,?,128],f16>
    %float0.000000e00_6052 = torch.constant.float 0.000000e+00
    %false_6053 = torch.constant.bool false
    %none_6054 = torch.constant.none
    %5203:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%5200, %5201, %5202, %float0.000000e00_6052, %false_6053, %320, %none_6054) : (!torch.vtensor<[4,32,?,128],f16>, !torch.vtensor<[4,32,?,128],f16>, !torch.vtensor<[4,32,?,128],f16>, !torch.float, !torch.bool, !torch.vtensor<[4,1,?,?],f16>, !torch.none) -> (!torch.vtensor<[4,32,?,128],f16>, !torch.vtensor<[4,32,?],f32>) 
    torch.bind_symbolic_shape %5203#0, [%292], affine_map<()[s0] -> (4, 32, s0 * 32, 128)> : !torch.vtensor<[4,32,?,128],f16>
    %int1_6055 = torch.constant.int 1
    %int2_6056 = torch.constant.int 2
    %5204 = torch.aten.transpose.int %5203#0, %int1_6055, %int2_6056 : !torch.vtensor<[4,32,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %5204, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int4_6057 = torch.constant.int 4
    %int4096_6058 = torch.constant.int 4096
    %5205 = torch.prim.ListConstruct %int4_6057, %5102, %int4096_6058 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5206 = torch.aten.view %5204, %5205 : !torch.vtensor<[4,?,32,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5206, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int-2_6059 = torch.constant.int -2
    %int-1_6060 = torch.constant.int -1
    %5207 = torch.aten.transpose.int %221, %int-2_6059, %int-1_6060 : !torch.vtensor<[4096,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,4096],f16>
    %int4_6061 = torch.constant.int 4
    %5208 = torch.aten.mul.int %int4_6061, %5102 : !torch.int, !torch.int -> !torch.int
    %int4096_6062 = torch.constant.int 4096
    %5209 = torch.prim.ListConstruct %5208, %int4096_6062 : (!torch.int, !torch.int) -> !torch.list<int>
    %5210 = torch.aten.view %5206, %5209 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %5210, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %5211 = torch.aten.mm %5210, %5207 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %5211, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_6063 = torch.constant.int 4
    %int4096_6064 = torch.constant.int 4096
    %5212 = torch.prim.ListConstruct %int4_6063, %5102, %int4096_6064 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5213 = torch.aten.view %5211, %5212 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5213, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_6065 = torch.constant.int 1
    %5214 = torch.aten.add.Tensor %5051, %5213, %int1_6065 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5214, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int6_6066 = torch.constant.int 6
    %5215 = torch.prims.convert_element_type %5214, %int6_6066 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %5215, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_6067 = torch.constant.int 2
    %5216 = torch.aten.pow.Tensor_Scalar %5215, %int2_6067 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %5216, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int-1_6068 = torch.constant.int -1
    %5217 = torch.prim.ListConstruct %int-1_6068 : (!torch.int) -> !torch.list<int>
    %true_6069 = torch.constant.bool true
    %none_6070 = torch.constant.none
    %5218 = torch.aten.mean.dim %5216, %5217, %true_6069, %none_6070 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %5218, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_6071 = torch.constant.float 9.9999997473787516E-6
    %int1_6072 = torch.constant.int 1
    %5219 = torch.aten.add.Scalar %5218, %float9.999990e-06_6071, %int1_6072 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %5219, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %5220 = torch.aten.rsqrt %5219 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %5220, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %5221 = torch.aten.mul.Tensor %5215, %5220 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %5221, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %5222 = torch.aten.mul.Tensor %222, %5221 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %5222, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int5_6073 = torch.constant.int 5
    %5223 = torch.prims.convert_element_type %5222, %int5_6073 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5223, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int-2_6074 = torch.constant.int -2
    %int-1_6075 = torch.constant.int -1
    %5224 = torch.aten.transpose.int %223, %int-2_6074, %int-1_6075 : !torch.vtensor<[14336,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,14336],f16>
    %int4_6076 = torch.constant.int 4
    %5225 = torch.aten.mul.int %int4_6076, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_6077 = torch.constant.int 4096
    %5226 = torch.prim.ListConstruct %5225, %int4096_6077 : (!torch.int, !torch.int) -> !torch.list<int>
    %5227 = torch.aten.view %5223, %5226 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %5227, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %5228 = torch.aten.mm %5227, %5224 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,14336],f16> -> !torch.vtensor<[?,14336],f16>
    torch.bind_symbolic_shape %5228, [%292], affine_map<()[s0] -> (s0 * 128, 14336)> : !torch.vtensor<[?,14336],f16>
    %int4_6078 = torch.constant.int 4
    %int14336_6079 = torch.constant.int 14336
    %5229 = torch.prim.ListConstruct %int4_6078, %294, %int14336_6079 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5230 = torch.aten.view %5228, %5229 : !torch.vtensor<[?,14336],f16>, !torch.list<int> -> !torch.vtensor<[4,?,14336],f16>
    torch.bind_symbolic_shape %5230, [%292], affine_map<()[s0] -> (4, s0 * 32, 14336)> : !torch.vtensor<[4,?,14336],f16>
    %5231 = torch.aten.silu %5230 : !torch.vtensor<[4,?,14336],f16> -> !torch.vtensor<[4,?,14336],f16>
    torch.bind_symbolic_shape %5231, [%292], affine_map<()[s0] -> (4, s0 * 32, 14336)> : !torch.vtensor<[4,?,14336],f16>
    %int-2_6080 = torch.constant.int -2
    %int-1_6081 = torch.constant.int -1
    %5232 = torch.aten.transpose.int %224, %int-2_6080, %int-1_6081 : !torch.vtensor<[14336,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,14336],f16>
    %int4_6082 = torch.constant.int 4
    %5233 = torch.aten.mul.int %int4_6082, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_6083 = torch.constant.int 4096
    %5234 = torch.prim.ListConstruct %5233, %int4096_6083 : (!torch.int, !torch.int) -> !torch.list<int>
    %5235 = torch.aten.view %5223, %5234 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %5235, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %5236 = torch.aten.mm %5235, %5232 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,14336],f16> -> !torch.vtensor<[?,14336],f16>
    torch.bind_symbolic_shape %5236, [%292], affine_map<()[s0] -> (s0 * 128, 14336)> : !torch.vtensor<[?,14336],f16>
    %int4_6084 = torch.constant.int 4
    %int14336_6085 = torch.constant.int 14336
    %5237 = torch.prim.ListConstruct %int4_6084, %294, %int14336_6085 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5238 = torch.aten.view %5236, %5237 : !torch.vtensor<[?,14336],f16>, !torch.list<int> -> !torch.vtensor<[4,?,14336],f16>
    torch.bind_symbolic_shape %5238, [%292], affine_map<()[s0] -> (4, s0 * 32, 14336)> : !torch.vtensor<[4,?,14336],f16>
    %5239 = torch.aten.mul.Tensor %5231, %5238 : !torch.vtensor<[4,?,14336],f16>, !torch.vtensor<[4,?,14336],f16> -> !torch.vtensor<[4,?,14336],f16>
    torch.bind_symbolic_shape %5239, [%292], affine_map<()[s0] -> (4, s0 * 32, 14336)> : !torch.vtensor<[4,?,14336],f16>
    %int-2_6086 = torch.constant.int -2
    %int-1_6087 = torch.constant.int -1
    %5240 = torch.aten.transpose.int %225, %int-2_6086, %int-1_6087 : !torch.vtensor<[4096,14336],f16>, !torch.int, !torch.int -> !torch.vtensor<[14336,4096],f16>
    %int1_6088 = torch.constant.int 1
    %5241 = torch.aten.size.int %5230, %int1_6088 : !torch.vtensor<[4,?,14336],f16>, !torch.int -> !torch.int
    %int4_6089 = torch.constant.int 4
    %5242 = torch.aten.mul.int %int4_6089, %5241 : !torch.int, !torch.int -> !torch.int
    %int14336_6090 = torch.constant.int 14336
    %5243 = torch.prim.ListConstruct %5242, %int14336_6090 : (!torch.int, !torch.int) -> !torch.list<int>
    %5244 = torch.aten.view %5239, %5243 : !torch.vtensor<[4,?,14336],f16>, !torch.list<int> -> !torch.vtensor<[?,14336],f16>
    torch.bind_symbolic_shape %5244, [%292], affine_map<()[s0] -> (s0 * 128, 14336)> : !torch.vtensor<[?,14336],f16>
    %5245 = torch.aten.mm %5244, %5240 : !torch.vtensor<[?,14336],f16>, !torch.vtensor<[14336,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %5245, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_6091 = torch.constant.int 4
    %int4096_6092 = torch.constant.int 4096
    %5246 = torch.prim.ListConstruct %int4_6091, %5241, %int4096_6092 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5247 = torch.aten.view %5245, %5246 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5247, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_6093 = torch.constant.int 1
    %5248 = torch.aten.add.Tensor %5214, %5247, %int1_6093 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5248, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int6_6094 = torch.constant.int 6
    %5249 = torch.prims.convert_element_type %5248, %int6_6094 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %5249, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_6095 = torch.constant.int 2
    %5250 = torch.aten.pow.Tensor_Scalar %5249, %int2_6095 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %5250, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int-1_6096 = torch.constant.int -1
    %5251 = torch.prim.ListConstruct %int-1_6096 : (!torch.int) -> !torch.list<int>
    %true_6097 = torch.constant.bool true
    %none_6098 = torch.constant.none
    %5252 = torch.aten.mean.dim %5250, %5251, %true_6097, %none_6098 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %5252, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_6099 = torch.constant.float 9.9999997473787516E-6
    %int1_6100 = torch.constant.int 1
    %5253 = torch.aten.add.Scalar %5252, %float9.999990e-06_6099, %int1_6100 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %5253, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %5254 = torch.aten.rsqrt %5253 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %5254, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %5255 = torch.aten.mul.Tensor %5249, %5254 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %5255, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %5256 = torch.aten.mul.Tensor %226, %5255 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %5256, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int5_6101 = torch.constant.int 5
    %5257 = torch.prims.convert_element_type %5256, %int5_6101 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5257, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int-2_6102 = torch.constant.int -2
    %int-1_6103 = torch.constant.int -1
    %5258 = torch.aten.transpose.int %227, %int-2_6102, %int-1_6103 : !torch.vtensor<[4096,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,4096],f16>
    %int4_6104 = torch.constant.int 4
    %5259 = torch.aten.mul.int %int4_6104, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_6105 = torch.constant.int 4096
    %5260 = torch.prim.ListConstruct %5259, %int4096_6105 : (!torch.int, !torch.int) -> !torch.list<int>
    %5261 = torch.aten.view %5257, %5260 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %5261, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %5262 = torch.aten.mm %5261, %5258 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %5262, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_6106 = torch.constant.int 4
    %int4096_6107 = torch.constant.int 4096
    %5263 = torch.prim.ListConstruct %int4_6106, %294, %int4096_6107 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5264 = torch.aten.view %5262, %5263 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5264, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int-2_6108 = torch.constant.int -2
    %int-1_6109 = torch.constant.int -1
    %5265 = torch.aten.transpose.int %228, %int-2_6108, %int-1_6109 : !torch.vtensor<[1024,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,1024],f16>
    %int4_6110 = torch.constant.int 4
    %5266 = torch.aten.mul.int %int4_6110, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_6111 = torch.constant.int 4096
    %5267 = torch.prim.ListConstruct %5266, %int4096_6111 : (!torch.int, !torch.int) -> !torch.list<int>
    %5268 = torch.aten.view %5257, %5267 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %5268, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %5269 = torch.aten.mm %5268, %5265 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1024],f16> -> !torch.vtensor<[?,1024],f16>
    torch.bind_symbolic_shape %5269, [%292], affine_map<()[s0] -> (s0 * 128, 1024)> : !torch.vtensor<[?,1024],f16>
    %int4_6112 = torch.constant.int 4
    %int1024_6113 = torch.constant.int 1024
    %5270 = torch.prim.ListConstruct %int4_6112, %294, %int1024_6113 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5271 = torch.aten.view %5269, %5270 : !torch.vtensor<[?,1024],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1024],f16>
    torch.bind_symbolic_shape %5271, [%292], affine_map<()[s0] -> (4, s0 * 32, 1024)> : !torch.vtensor<[4,?,1024],f16>
    %int-2_6114 = torch.constant.int -2
    %int-1_6115 = torch.constant.int -1
    %5272 = torch.aten.transpose.int %229, %int-2_6114, %int-1_6115 : !torch.vtensor<[1024,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,1024],f16>
    %int4_6116 = torch.constant.int 4
    %5273 = torch.aten.mul.int %int4_6116, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_6117 = torch.constant.int 4096
    %5274 = torch.prim.ListConstruct %5273, %int4096_6117 : (!torch.int, !torch.int) -> !torch.list<int>
    %5275 = torch.aten.view %5257, %5274 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %5275, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %5276 = torch.aten.mm %5275, %5272 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1024],f16> -> !torch.vtensor<[?,1024],f16>
    torch.bind_symbolic_shape %5276, [%292], affine_map<()[s0] -> (s0 * 128, 1024)> : !torch.vtensor<[?,1024],f16>
    %int4_6118 = torch.constant.int 4
    %int1024_6119 = torch.constant.int 1024
    %5277 = torch.prim.ListConstruct %int4_6118, %294, %int1024_6119 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5278 = torch.aten.view %5276, %5277 : !torch.vtensor<[?,1024],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1024],f16>
    torch.bind_symbolic_shape %5278, [%292], affine_map<()[s0] -> (4, s0 * 32, 1024)> : !torch.vtensor<[4,?,1024],f16>
    %int4_6120 = torch.constant.int 4
    %int32_6121 = torch.constant.int 32
    %int128_6122 = torch.constant.int 128
    %5279 = torch.prim.ListConstruct %int4_6120, %294, %int32_6121, %int128_6122 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5280 = torch.aten.view %5264, %5279 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %5280, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int4_6123 = torch.constant.int 4
    %int8_6124 = torch.constant.int 8
    %int128_6125 = torch.constant.int 128
    %5281 = torch.prim.ListConstruct %int4_6123, %294, %int8_6124, %int128_6125 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5282 = torch.aten.view %5271, %5281 : !torch.vtensor<[4,?,1024],f16>, !torch.list<int> -> !torch.vtensor<[4,?,8,128],f16>
    torch.bind_symbolic_shape %5282, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 128)> : !torch.vtensor<[4,?,8,128],f16>
    %int4_6126 = torch.constant.int 4
    %int8_6127 = torch.constant.int 8
    %int128_6128 = torch.constant.int 128
    %5283 = torch.prim.ListConstruct %int4_6126, %294, %int8_6127, %int128_6128 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5284 = torch.aten.view %5278, %5283 : !torch.vtensor<[4,?,1024],f16>, !torch.list<int> -> !torch.vtensor<[4,?,8,128],f16>
    torch.bind_symbolic_shape %5284, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 128)> : !torch.vtensor<[4,?,8,128],f16>
    %int131072_6129 = torch.constant.int 131072
    %none_6130 = torch.constant.none
    %none_6131 = torch.constant.none
    %cpu_6132 = torch.constant.device "cpu"
    %false_6133 = torch.constant.bool false
    %5285 = torch.aten.arange %int131072_6129, %none_6130, %none_6131, %cpu_6132, %false_6133 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64>
    %int0_6134 = torch.constant.int 0
    %int128_6135 = torch.constant.int 128
    %int2_6136 = torch.constant.int 2
    %none_6137 = torch.constant.none
    %none_6138 = torch.constant.none
    %cpu_6139 = torch.constant.device "cpu"
    %false_6140 = torch.constant.bool false
    %5286 = torch.aten.arange.start_step %int0_6134, %int128_6135, %int2_6136, %none_6137, %none_6138, %cpu_6139, %false_6140 : !torch.int, !torch.int, !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64>
    %int0_6141 = torch.constant.int 0
    %int0_6142 = torch.constant.int 0
    %int64_6143 = torch.constant.int 64
    %int1_6144 = torch.constant.int 1
    %5287 = torch.aten.slice.Tensor %5286, %int0_6141, %int0_6142, %int64_6143, %int1_6144 : !torch.vtensor<[64],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[64],si64>
    %int6_6145 = torch.constant.int 6
    %5288 = torch.prims.convert_element_type %5287, %int6_6145 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32>
    %int128_6146 = torch.constant.int 128
    %5289 = torch.aten.div.Scalar %5288, %int128_6146 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32>
    %float5.000000e05_6147 = torch.constant.float 5.000000e+05
    %5290 = torch.aten.pow.Scalar %float5.000000e05_6147, %5289 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %5291 = torch.aten.reciprocal %5290 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %float1.000000e00_6148 = torch.constant.float 1.000000e+00
    %5292 = torch.aten.mul.Scalar %5291, %float1.000000e00_6148 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32>
    %int131072_6149 = torch.constant.int 131072
    %int1_6150 = torch.constant.int 1
    %5293 = torch.prim.ListConstruct %int131072_6149, %int1_6150 : (!torch.int, !torch.int) -> !torch.list<int>
    %5294 = torch.aten.view %5285, %5293 : !torch.vtensor<[131072],si64>, !torch.list<int> -> !torch.vtensor<[131072,1],si64>
    %5295 = torch.aten.mul.Tensor %5294, %5292 : !torch.vtensor<[131072,1],si64>, !torch.vtensor<[64],f32> -> !torch.vtensor<[131072,64],f32>
    %5296 = torch.aten.cos %5295 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %5297 = torch.aten.sin %5295 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %5298 = torch.aten.complex %5296, %5297 : !torch.vtensor<[131072,64],f32>, !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],complex<f32>>
    %int1_6151 = torch.constant.int 1
    %5299 = torch.aten.size.int %5264, %int1_6151 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.int
    %int0_6152 = torch.constant.int 0
    %5300 = torch.aten.add.int %int0_6152, %5299 : !torch.int, !torch.int -> !torch.int
    %int0_6153 = torch.constant.int 0
    %int0_6154 = torch.constant.int 0
    %int1_6155 = torch.constant.int 1
    %5301 = torch.aten.slice.Tensor %5298, %int0_6153, %int0_6154, %5300, %int1_6155 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %5301, [%292], affine_map<()[s0] -> (s0 * 32, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_6156 = torch.constant.int 1
    %int0_6157 = torch.constant.int 0
    %int9223372036854775807_6158 = torch.constant.int 9223372036854775807
    %int1_6159 = torch.constant.int 1
    %5302 = torch.aten.slice.Tensor %5301, %int1_6156, %int0_6157, %int9223372036854775807_6158, %int1_6159 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %5302, [%292], affine_map<()[s0] -> (s0 * 32, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_6160 = torch.constant.int 0
    %5303 = torch.aten.unsqueeze %5302, %int0_6160 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %5303, [%292], affine_map<()[s0] -> (1, s0 * 32, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_6161 = torch.constant.int 2
    %5304 = torch.aten.unsqueeze %5303, %int2_6161 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %5304, [%292], affine_map<()[s0] -> (1, s0 * 32, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_6162 = torch.constant.int 3
    %int0_6163 = torch.constant.int 0
    %int9223372036854775807_6164 = torch.constant.int 9223372036854775807
    %int1_6165 = torch.constant.int 1
    %5305 = torch.aten.slice.Tensor %5304, %int3_6162, %int0_6163, %int9223372036854775807_6164, %int1_6165 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %5305, [%292], affine_map<()[s0] -> (1, s0 * 32, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %5306 = torch_c.to_builtin_tensor %5280 : !torch.vtensor<[4,?,32,128],f16> -> tensor<4x?x32x128xf16>
    %c1_6166 = arith.constant 1 : index
    %dim_6167 = tensor.dim %5306, %c1_6166 : tensor<4x?x32x128xf16>
    %5307 = flow.tensor.bitcast %5306 : tensor<4x?x32x128xf16>{%dim_6167} -> tensor<4x?x32x64xcomplex<f16>>{%dim_6167}
    %5308 = torch_c.from_builtin_tensor %5307 : tensor<4x?x32x64xcomplex<f16>> -> !torch.vtensor<[4,?,32,64],complex<f16>>
    torch.bind_symbolic_shape %5308, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 64)> : !torch.vtensor<[4,?,32,64],complex<f16>>
    %5309 = torch.aten.mul.Tensor %5308, %5305 : !torch.vtensor<[4,?,32,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,32,64],complex<f32>>
    torch.bind_symbolic_shape %5309, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 64)> : !torch.vtensor<[4,?,32,64],complex<f32>>
    %5310 = torch_c.to_builtin_tensor %5309 : !torch.vtensor<[4,?,32,64],complex<f32>> -> tensor<4x?x32x64xcomplex<f32>>
    %c1_6168 = arith.constant 1 : index
    %dim_6169 = tensor.dim %5310, %c1_6168 : tensor<4x?x32x64xcomplex<f32>>
    %5311 = flow.tensor.bitcast %5310 : tensor<4x?x32x64xcomplex<f32>>{%dim_6169} -> tensor<4x?x32x128xf32>{%dim_6169}
    %5312 = torch_c.from_builtin_tensor %5311 : tensor<4x?x32x128xf32> -> !torch.vtensor<[4,?,32,128],f32>
    torch.bind_symbolic_shape %5312, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f32>
    %int5_6170 = torch.constant.int 5
    %5313 = torch.prims.convert_element_type %5312, %int5_6170 : !torch.vtensor<[4,?,32,128],f32>, !torch.int -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %5313, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int131072_6171 = torch.constant.int 131072
    %none_6172 = torch.constant.none
    %none_6173 = torch.constant.none
    %cpu_6174 = torch.constant.device "cpu"
    %false_6175 = torch.constant.bool false
    %5314 = torch.aten.arange %int131072_6171, %none_6172, %none_6173, %cpu_6174, %false_6175 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64>
    %int0_6176 = torch.constant.int 0
    %int128_6177 = torch.constant.int 128
    %int2_6178 = torch.constant.int 2
    %none_6179 = torch.constant.none
    %none_6180 = torch.constant.none
    %cpu_6181 = torch.constant.device "cpu"
    %false_6182 = torch.constant.bool false
    %5315 = torch.aten.arange.start_step %int0_6176, %int128_6177, %int2_6178, %none_6179, %none_6180, %cpu_6181, %false_6182 : !torch.int, !torch.int, !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64>
    %int0_6183 = torch.constant.int 0
    %int0_6184 = torch.constant.int 0
    %int64_6185 = torch.constant.int 64
    %int1_6186 = torch.constant.int 1
    %5316 = torch.aten.slice.Tensor %5315, %int0_6183, %int0_6184, %int64_6185, %int1_6186 : !torch.vtensor<[64],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[64],si64>
    %int6_6187 = torch.constant.int 6
    %5317 = torch.prims.convert_element_type %5316, %int6_6187 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32>
    %int128_6188 = torch.constant.int 128
    %5318 = torch.aten.div.Scalar %5317, %int128_6188 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32>
    %float5.000000e05_6189 = torch.constant.float 5.000000e+05
    %5319 = torch.aten.pow.Scalar %float5.000000e05_6189, %5318 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %5320 = torch.aten.reciprocal %5319 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %float1.000000e00_6190 = torch.constant.float 1.000000e+00
    %5321 = torch.aten.mul.Scalar %5320, %float1.000000e00_6190 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32>
    %int131072_6191 = torch.constant.int 131072
    %int1_6192 = torch.constant.int 1
    %5322 = torch.prim.ListConstruct %int131072_6191, %int1_6192 : (!torch.int, !torch.int) -> !torch.list<int>
    %5323 = torch.aten.view %5314, %5322 : !torch.vtensor<[131072],si64>, !torch.list<int> -> !torch.vtensor<[131072,1],si64>
    %5324 = torch.aten.mul.Tensor %5323, %5321 : !torch.vtensor<[131072,1],si64>, !torch.vtensor<[64],f32> -> !torch.vtensor<[131072,64],f32>
    %5325 = torch.aten.cos %5324 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %5326 = torch.aten.sin %5324 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %5327 = torch.aten.complex %5325, %5326 : !torch.vtensor<[131072,64],f32>, !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],complex<f32>>
    %int1_6193 = torch.constant.int 1
    %5328 = torch.aten.size.int %5271, %int1_6193 : !torch.vtensor<[4,?,1024],f16>, !torch.int -> !torch.int
    %int0_6194 = torch.constant.int 0
    %5329 = torch.aten.add.int %int0_6194, %5328 : !torch.int, !torch.int -> !torch.int
    %int0_6195 = torch.constant.int 0
    %int0_6196 = torch.constant.int 0
    %int1_6197 = torch.constant.int 1
    %5330 = torch.aten.slice.Tensor %5327, %int0_6195, %int0_6196, %5329, %int1_6197 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %5330, [%292], affine_map<()[s0] -> (s0 * 32, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_6198 = torch.constant.int 1
    %int0_6199 = torch.constant.int 0
    %int9223372036854775807_6200 = torch.constant.int 9223372036854775807
    %int1_6201 = torch.constant.int 1
    %5331 = torch.aten.slice.Tensor %5330, %int1_6198, %int0_6199, %int9223372036854775807_6200, %int1_6201 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %5331, [%292], affine_map<()[s0] -> (s0 * 32, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_6202 = torch.constant.int 0
    %5332 = torch.aten.unsqueeze %5331, %int0_6202 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %5332, [%292], affine_map<()[s0] -> (1, s0 * 32, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_6203 = torch.constant.int 2
    %5333 = torch.aten.unsqueeze %5332, %int2_6203 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %5333, [%292], affine_map<()[s0] -> (1, s0 * 32, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_6204 = torch.constant.int 3
    %int0_6205 = torch.constant.int 0
    %int9223372036854775807_6206 = torch.constant.int 9223372036854775807
    %int1_6207 = torch.constant.int 1
    %5334 = torch.aten.slice.Tensor %5333, %int3_6204, %int0_6205, %int9223372036854775807_6206, %int1_6207 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %5334, [%292], affine_map<()[s0] -> (1, s0 * 32, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %5335 = torch_c.to_builtin_tensor %5282 : !torch.vtensor<[4,?,8,128],f16> -> tensor<4x?x8x128xf16>
    %c1_6208 = arith.constant 1 : index
    %dim_6209 = tensor.dim %5335, %c1_6208 : tensor<4x?x8x128xf16>
    %5336 = flow.tensor.bitcast %5335 : tensor<4x?x8x128xf16>{%dim_6209} -> tensor<4x?x8x64xcomplex<f16>>{%dim_6209}
    %5337 = torch_c.from_builtin_tensor %5336 : tensor<4x?x8x64xcomplex<f16>> -> !torch.vtensor<[4,?,8,64],complex<f16>>
    torch.bind_symbolic_shape %5337, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 64)> : !torch.vtensor<[4,?,8,64],complex<f16>>
    %5338 = torch.aten.mul.Tensor %5337, %5334 : !torch.vtensor<[4,?,8,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,8,64],complex<f32>>
    torch.bind_symbolic_shape %5338, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 64)> : !torch.vtensor<[4,?,8,64],complex<f32>>
    %5339 = torch_c.to_builtin_tensor %5338 : !torch.vtensor<[4,?,8,64],complex<f32>> -> tensor<4x?x8x64xcomplex<f32>>
    %c1_6210 = arith.constant 1 : index
    %dim_6211 = tensor.dim %5339, %c1_6210 : tensor<4x?x8x64xcomplex<f32>>
    %5340 = flow.tensor.bitcast %5339 : tensor<4x?x8x64xcomplex<f32>>{%dim_6211} -> tensor<4x?x8x128xf32>{%dim_6211}
    %5341 = torch_c.from_builtin_tensor %5340 : tensor<4x?x8x128xf32> -> !torch.vtensor<[4,?,8,128],f32>
    torch.bind_symbolic_shape %5341, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 128)> : !torch.vtensor<[4,?,8,128],f32>
    %int5_6212 = torch.constant.int 5
    %5342 = torch.prims.convert_element_type %5341, %int5_6212 : !torch.vtensor<[4,?,8,128],f32>, !torch.int -> !torch.vtensor<[4,?,8,128],f16>
    torch.bind_symbolic_shape %5342, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 128)> : !torch.vtensor<[4,?,8,128],f16>
    %int64_6213 = torch.constant.int 64
    %5343 = torch.aten.mul.Scalar %arg2, %int64_6213 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %5343, [%292], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int50 = torch.constant.int 50
    %int1_6214 = torch.constant.int 1
    %5344 = torch.aten.add.Scalar %5343, %int50, %int1_6214 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %5344, [%292], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int4_6215 = torch.constant.int 4
    %int32_6216 = torch.constant.int 32
    %int8_6217 = torch.constant.int 8
    %int128_6218 = torch.constant.int 128
    %5345 = torch.prim.ListConstruct %int4_6215, %425, %int32_6216, %int8_6217, %int128_6218 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5346 = torch.aten.view %5342, %5345 : !torch.vtensor<[4,?,8,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,8,128],f16>
    torch.bind_symbolic_shape %5346, [%292], affine_map<()[s0] -> (4, s0, 32, 8, 128)> : !torch.vtensor<[4,?,32,8,128],f16>
    %int4_6219 = torch.constant.int 4
    %5347 = torch.aten.mul.int %int4_6219, %425 : !torch.int, !torch.int -> !torch.int
    %int32_6220 = torch.constant.int 32
    %int8_6221 = torch.constant.int 8
    %int128_6222 = torch.constant.int 128
    %5348 = torch.prim.ListConstruct %5347, %int32_6220, %int8_6221, %int128_6222 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5349 = torch.aten.view %5346, %5348 : !torch.vtensor<[4,?,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %5349, [%292], affine_map<()[s0] -> (s0 * 4, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int4_6223 = torch.constant.int 4
    %5350 = torch.aten.mul.int %int4_6223, %425 : !torch.int, !torch.int -> !torch.int
    %5351 = torch.prim.ListConstruct %5350 : (!torch.int) -> !torch.list<int>
    %5352 = torch.aten.view %5344, %5351 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %5352, [%292], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int32_6224 = torch.constant.int 32
    %int2_6225 = torch.constant.int 2
    %int32_6226 = torch.constant.int 32
    %int8_6227 = torch.constant.int 8
    %int128_6228 = torch.constant.int 128
    %5353 = torch.prim.ListConstruct %416, %int32_6224, %int2_6225, %int32_6226, %int8_6227, %int128_6228 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5354 = torch.aten.view %5186, %5353 : !torch.vtensor<[?,2097152],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f16>
    torch.bind_symbolic_shape %5354, [%293], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f16>
    %int32_6229 = torch.constant.int 32
    %5355 = torch.aten.mul.int %416, %int32_6229 : !torch.int, !torch.int -> !torch.int
    %int2_6230 = torch.constant.int 2
    %5356 = torch.aten.mul.int %5355, %int2_6230 : !torch.int, !torch.int -> !torch.int
    %int32_6231 = torch.constant.int 32
    %int8_6232 = torch.constant.int 8
    %int128_6233 = torch.constant.int 128
    %5357 = torch.prim.ListConstruct %5356, %int32_6231, %int8_6232, %int128_6233 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5358 = torch.aten.view %5354, %5357 : !torch.vtensor<[?,32,2,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %5358, [%293], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %5359 = torch.prim.ListConstruct %5352 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_6234 = torch.constant.bool false
    %5360 = torch.aten.index_put %5358, %5359, %5349, %false_6234 : !torch.vtensor<[?,32,8,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,32,8,128],f16>, !torch.bool -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %5360, [%293], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int32_6235 = torch.constant.int 32
    %int2_6236 = torch.constant.int 2
    %int32_6237 = torch.constant.int 32
    %int8_6238 = torch.constant.int 8
    %int128_6239 = torch.constant.int 128
    %5361 = torch.prim.ListConstruct %416, %int32_6235, %int2_6236, %int32_6237, %int8_6238, %int128_6239 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5362 = torch.aten.view %5360, %5361 : !torch.vtensor<[?,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f16>
    torch.bind_symbolic_shape %5362, [%293], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f16>
    %int2097152_6240 = torch.constant.int 2097152
    %5363 = torch.prim.ListConstruct %416, %int2097152_6240 : (!torch.int, !torch.int) -> !torch.list<int>
    %5364 = torch.aten.view %5362, %5363 : !torch.vtensor<[?,32,2,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,2097152],f16>
    torch.bind_symbolic_shape %5364, [%293], affine_map<()[s0] -> (s0, 2097152)> : !torch.vtensor<[?,2097152],f16>
    %int32_6241 = torch.constant.int 32
    %int2_6242 = torch.constant.int 2
    %int32_6243 = torch.constant.int 32
    %int8_6244 = torch.constant.int 8
    %int128_6245 = torch.constant.int 128
    %5365 = torch.prim.ListConstruct %416, %int32_6241, %int2_6242, %int32_6243, %int8_6244, %int128_6245 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5366 = torch.aten.view %5364, %5365 : !torch.vtensor<[?,2097152],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f16>
    torch.bind_symbolic_shape %5366, [%293], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f16>
    %int32_6246 = torch.constant.int 32
    %int8_6247 = torch.constant.int 8
    %int128_6248 = torch.constant.int 128
    %5367 = torch.prim.ListConstruct %5356, %int32_6246, %int8_6247, %int128_6248 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5368 = torch.aten.view %5366, %5367 : !torch.vtensor<[?,32,2,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %5368, [%293], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int4_6249 = torch.constant.int 4
    %int32_6250 = torch.constant.int 32
    %int8_6251 = torch.constant.int 8
    %int128_6252 = torch.constant.int 128
    %5369 = torch.prim.ListConstruct %int4_6249, %425, %int32_6250, %int8_6251, %int128_6252 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5370 = torch.aten.view %5284, %5369 : !torch.vtensor<[4,?,8,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,8,128],f16>
    torch.bind_symbolic_shape %5370, [%292], affine_map<()[s0] -> (4, s0, 32, 8, 128)> : !torch.vtensor<[4,?,32,8,128],f16>
    %int4_6253 = torch.constant.int 4
    %5371 = torch.aten.mul.int %int4_6253, %425 : !torch.int, !torch.int -> !torch.int
    %int32_6254 = torch.constant.int 32
    %int8_6255 = torch.constant.int 8
    %int128_6256 = torch.constant.int 128
    %5372 = torch.prim.ListConstruct %5371, %int32_6254, %int8_6255, %int128_6256 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5373 = torch.aten.view %5370, %5372 : !torch.vtensor<[4,?,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %5373, [%292], affine_map<()[s0] -> (s0 * 4, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int1_6257 = torch.constant.int 1
    %int1_6258 = torch.constant.int 1
    %5374 = torch.aten.add.Scalar %5344, %int1_6257, %int1_6258 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %5374, [%292], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int4_6259 = torch.constant.int 4
    %5375 = torch.aten.mul.int %int4_6259, %425 : !torch.int, !torch.int -> !torch.int
    %5376 = torch.prim.ListConstruct %5375 : (!torch.int) -> !torch.list<int>
    %5377 = torch.aten.view %5374, %5376 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %5377, [%292], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %5378 = torch.prim.ListConstruct %5377 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_6260 = torch.constant.bool false
    %5379 = torch.aten.index_put %5368, %5378, %5373, %false_6260 : !torch.vtensor<[?,32,8,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,32,8,128],f16>, !torch.bool -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %5379, [%293], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int32_6261 = torch.constant.int 32
    %int2_6262 = torch.constant.int 2
    %int32_6263 = torch.constant.int 32
    %int8_6264 = torch.constant.int 8
    %int128_6265 = torch.constant.int 128
    %5380 = torch.prim.ListConstruct %416, %int32_6261, %int2_6262, %int32_6263, %int8_6264, %int128_6265 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5381 = torch.aten.view %5379, %5380 : !torch.vtensor<[?,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f16>
    torch.bind_symbolic_shape %5381, [%293], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f16>
    %int2097152_6266 = torch.constant.int 2097152
    %5382 = torch.prim.ListConstruct %416, %int2097152_6266 : (!torch.int, !torch.int) -> !torch.list<int>
    %5383 = torch.aten.view %5381, %5382 : !torch.vtensor<[?,32,2,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,2097152],f16>
    torch.bind_symbolic_shape %5383, [%293], affine_map<()[s0] -> (s0, 2097152)> : !torch.vtensor<[?,2097152],f16>
    %int-2_6267 = torch.constant.int -2
    %5384 = torch.aten.unsqueeze %5342, %int-2_6267 : !torch.vtensor<[4,?,8,128],f16>, !torch.int -> !torch.vtensor<[4,?,8,1,128],f16>
    torch.bind_symbolic_shape %5384, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 1, 128)> : !torch.vtensor<[4,?,8,1,128],f16>
    %int4_6268 = torch.constant.int 4
    %int8_6269 = torch.constant.int 8
    %int4_6270 = torch.constant.int 4
    %int128_6271 = torch.constant.int 128
    %5385 = torch.prim.ListConstruct %int4_6268, %5328, %int8_6269, %int4_6270, %int128_6271 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_6272 = torch.constant.bool false
    %5386 = torch.aten.expand %5384, %5385, %false_6272 : !torch.vtensor<[4,?,8,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,8,4,128],f16>
    torch.bind_symbolic_shape %5386, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 4, 128)> : !torch.vtensor<[4,?,8,4,128],f16>
    %int0_6273 = torch.constant.int 0
    %5387 = torch.aten.clone %5386, %int0_6273 : !torch.vtensor<[4,?,8,4,128],f16>, !torch.int -> !torch.vtensor<[4,?,8,4,128],f16>
    torch.bind_symbolic_shape %5387, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 4, 128)> : !torch.vtensor<[4,?,8,4,128],f16>
    %int4_6274 = torch.constant.int 4
    %int32_6275 = torch.constant.int 32
    %int128_6276 = torch.constant.int 128
    %5388 = torch.prim.ListConstruct %int4_6274, %5328, %int32_6275, %int128_6276 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5389 = torch.aten._unsafe_view %5387, %5388 : !torch.vtensor<[4,?,8,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %5389, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int-2_6277 = torch.constant.int -2
    %5390 = torch.aten.unsqueeze %5284, %int-2_6277 : !torch.vtensor<[4,?,8,128],f16>, !torch.int -> !torch.vtensor<[4,?,8,1,128],f16>
    torch.bind_symbolic_shape %5390, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 1, 128)> : !torch.vtensor<[4,?,8,1,128],f16>
    %int1_6278 = torch.constant.int 1
    %5391 = torch.aten.size.int %5278, %int1_6278 : !torch.vtensor<[4,?,1024],f16>, !torch.int -> !torch.int
    %int4_6279 = torch.constant.int 4
    %int8_6280 = torch.constant.int 8
    %int4_6281 = torch.constant.int 4
    %int128_6282 = torch.constant.int 128
    %5392 = torch.prim.ListConstruct %int4_6279, %5391, %int8_6280, %int4_6281, %int128_6282 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_6283 = torch.constant.bool false
    %5393 = torch.aten.expand %5390, %5392, %false_6283 : !torch.vtensor<[4,?,8,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,8,4,128],f16>
    torch.bind_symbolic_shape %5393, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 4, 128)> : !torch.vtensor<[4,?,8,4,128],f16>
    %int0_6284 = torch.constant.int 0
    %5394 = torch.aten.clone %5393, %int0_6284 : !torch.vtensor<[4,?,8,4,128],f16>, !torch.int -> !torch.vtensor<[4,?,8,4,128],f16>
    torch.bind_symbolic_shape %5394, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 4, 128)> : !torch.vtensor<[4,?,8,4,128],f16>
    %int4_6285 = torch.constant.int 4
    %int32_6286 = torch.constant.int 32
    %int128_6287 = torch.constant.int 128
    %5395 = torch.prim.ListConstruct %int4_6285, %5391, %int32_6286, %int128_6287 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5396 = torch.aten._unsafe_view %5394, %5395 : !torch.vtensor<[4,?,8,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %5396, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int1_6288 = torch.constant.int 1
    %int2_6289 = torch.constant.int 2
    %5397 = torch.aten.transpose.int %5313, %int1_6288, %int2_6289 : !torch.vtensor<[4,?,32,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,32,?,128],f16>
    torch.bind_symbolic_shape %5397, [%292], affine_map<()[s0] -> (4, 32, s0 * 32, 128)> : !torch.vtensor<[4,32,?,128],f16>
    %int1_6290 = torch.constant.int 1
    %int2_6291 = torch.constant.int 2
    %5398 = torch.aten.transpose.int %5389, %int1_6290, %int2_6291 : !torch.vtensor<[4,?,32,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,32,?,128],f16>
    torch.bind_symbolic_shape %5398, [%292], affine_map<()[s0] -> (4, 32, s0 * 32, 128)> : !torch.vtensor<[4,32,?,128],f16>
    %int1_6292 = torch.constant.int 1
    %int2_6293 = torch.constant.int 2
    %5399 = torch.aten.transpose.int %5396, %int1_6292, %int2_6293 : !torch.vtensor<[4,?,32,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,32,?,128],f16>
    torch.bind_symbolic_shape %5399, [%292], affine_map<()[s0] -> (4, 32, s0 * 32, 128)> : !torch.vtensor<[4,32,?,128],f16>
    %float0.000000e00_6294 = torch.constant.float 0.000000e+00
    %false_6295 = torch.constant.bool false
    %none_6296 = torch.constant.none
    %5400:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%5397, %5398, %5399, %float0.000000e00_6294, %false_6295, %320, %none_6296) : (!torch.vtensor<[4,32,?,128],f16>, !torch.vtensor<[4,32,?,128],f16>, !torch.vtensor<[4,32,?,128],f16>, !torch.float, !torch.bool, !torch.vtensor<[4,1,?,?],f16>, !torch.none) -> (!torch.vtensor<[4,32,?,128],f16>, !torch.vtensor<[4,32,?],f32>) 
    torch.bind_symbolic_shape %5400#0, [%292], affine_map<()[s0] -> (4, 32, s0 * 32, 128)> : !torch.vtensor<[4,32,?,128],f16>
    %int1_6297 = torch.constant.int 1
    %int2_6298 = torch.constant.int 2
    %5401 = torch.aten.transpose.int %5400#0, %int1_6297, %int2_6298 : !torch.vtensor<[4,32,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %5401, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int4_6299 = torch.constant.int 4
    %int4096_6300 = torch.constant.int 4096
    %5402 = torch.prim.ListConstruct %int4_6299, %5299, %int4096_6300 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5403 = torch.aten.view %5401, %5402 : !torch.vtensor<[4,?,32,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5403, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int-2_6301 = torch.constant.int -2
    %int-1_6302 = torch.constant.int -1
    %5404 = torch.aten.transpose.int %230, %int-2_6301, %int-1_6302 : !torch.vtensor<[4096,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,4096],f16>
    %int4_6303 = torch.constant.int 4
    %5405 = torch.aten.mul.int %int4_6303, %5299 : !torch.int, !torch.int -> !torch.int
    %int4096_6304 = torch.constant.int 4096
    %5406 = torch.prim.ListConstruct %5405, %int4096_6304 : (!torch.int, !torch.int) -> !torch.list<int>
    %5407 = torch.aten.view %5403, %5406 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %5407, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %5408 = torch.aten.mm %5407, %5404 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %5408, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_6305 = torch.constant.int 4
    %int4096_6306 = torch.constant.int 4096
    %5409 = torch.prim.ListConstruct %int4_6305, %5299, %int4096_6306 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5410 = torch.aten.view %5408, %5409 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5410, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_6307 = torch.constant.int 1
    %5411 = torch.aten.add.Tensor %5248, %5410, %int1_6307 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5411, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int6_6308 = torch.constant.int 6
    %5412 = torch.prims.convert_element_type %5411, %int6_6308 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %5412, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_6309 = torch.constant.int 2
    %5413 = torch.aten.pow.Tensor_Scalar %5412, %int2_6309 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %5413, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int-1_6310 = torch.constant.int -1
    %5414 = torch.prim.ListConstruct %int-1_6310 : (!torch.int) -> !torch.list<int>
    %true_6311 = torch.constant.bool true
    %none_6312 = torch.constant.none
    %5415 = torch.aten.mean.dim %5413, %5414, %true_6311, %none_6312 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %5415, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_6313 = torch.constant.float 9.9999997473787516E-6
    %int1_6314 = torch.constant.int 1
    %5416 = torch.aten.add.Scalar %5415, %float9.999990e-06_6313, %int1_6314 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %5416, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %5417 = torch.aten.rsqrt %5416 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %5417, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %5418 = torch.aten.mul.Tensor %5412, %5417 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %5418, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %5419 = torch.aten.mul.Tensor %231, %5418 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %5419, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int5_6315 = torch.constant.int 5
    %5420 = torch.prims.convert_element_type %5419, %int5_6315 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5420, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int-2_6316 = torch.constant.int -2
    %int-1_6317 = torch.constant.int -1
    %5421 = torch.aten.transpose.int %232, %int-2_6316, %int-1_6317 : !torch.vtensor<[14336,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,14336],f16>
    %int4_6318 = torch.constant.int 4
    %5422 = torch.aten.mul.int %int4_6318, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_6319 = torch.constant.int 4096
    %5423 = torch.prim.ListConstruct %5422, %int4096_6319 : (!torch.int, !torch.int) -> !torch.list<int>
    %5424 = torch.aten.view %5420, %5423 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %5424, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %5425 = torch.aten.mm %5424, %5421 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,14336],f16> -> !torch.vtensor<[?,14336],f16>
    torch.bind_symbolic_shape %5425, [%292], affine_map<()[s0] -> (s0 * 128, 14336)> : !torch.vtensor<[?,14336],f16>
    %int4_6320 = torch.constant.int 4
    %int14336_6321 = torch.constant.int 14336
    %5426 = torch.prim.ListConstruct %int4_6320, %294, %int14336_6321 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5427 = torch.aten.view %5425, %5426 : !torch.vtensor<[?,14336],f16>, !torch.list<int> -> !torch.vtensor<[4,?,14336],f16>
    torch.bind_symbolic_shape %5427, [%292], affine_map<()[s0] -> (4, s0 * 32, 14336)> : !torch.vtensor<[4,?,14336],f16>
    %5428 = torch.aten.silu %5427 : !torch.vtensor<[4,?,14336],f16> -> !torch.vtensor<[4,?,14336],f16>
    torch.bind_symbolic_shape %5428, [%292], affine_map<()[s0] -> (4, s0 * 32, 14336)> : !torch.vtensor<[4,?,14336],f16>
    %int-2_6322 = torch.constant.int -2
    %int-1_6323 = torch.constant.int -1
    %5429 = torch.aten.transpose.int %233, %int-2_6322, %int-1_6323 : !torch.vtensor<[14336,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,14336],f16>
    %int4_6324 = torch.constant.int 4
    %5430 = torch.aten.mul.int %int4_6324, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_6325 = torch.constant.int 4096
    %5431 = torch.prim.ListConstruct %5430, %int4096_6325 : (!torch.int, !torch.int) -> !torch.list<int>
    %5432 = torch.aten.view %5420, %5431 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %5432, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %5433 = torch.aten.mm %5432, %5429 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,14336],f16> -> !torch.vtensor<[?,14336],f16>
    torch.bind_symbolic_shape %5433, [%292], affine_map<()[s0] -> (s0 * 128, 14336)> : !torch.vtensor<[?,14336],f16>
    %int4_6326 = torch.constant.int 4
    %int14336_6327 = torch.constant.int 14336
    %5434 = torch.prim.ListConstruct %int4_6326, %294, %int14336_6327 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5435 = torch.aten.view %5433, %5434 : !torch.vtensor<[?,14336],f16>, !torch.list<int> -> !torch.vtensor<[4,?,14336],f16>
    torch.bind_symbolic_shape %5435, [%292], affine_map<()[s0] -> (4, s0 * 32, 14336)> : !torch.vtensor<[4,?,14336],f16>
    %5436 = torch.aten.mul.Tensor %5428, %5435 : !torch.vtensor<[4,?,14336],f16>, !torch.vtensor<[4,?,14336],f16> -> !torch.vtensor<[4,?,14336],f16>
    torch.bind_symbolic_shape %5436, [%292], affine_map<()[s0] -> (4, s0 * 32, 14336)> : !torch.vtensor<[4,?,14336],f16>
    %int-2_6328 = torch.constant.int -2
    %int-1_6329 = torch.constant.int -1
    %5437 = torch.aten.transpose.int %234, %int-2_6328, %int-1_6329 : !torch.vtensor<[4096,14336],f16>, !torch.int, !torch.int -> !torch.vtensor<[14336,4096],f16>
    %int1_6330 = torch.constant.int 1
    %5438 = torch.aten.size.int %5427, %int1_6330 : !torch.vtensor<[4,?,14336],f16>, !torch.int -> !torch.int
    %int4_6331 = torch.constant.int 4
    %5439 = torch.aten.mul.int %int4_6331, %5438 : !torch.int, !torch.int -> !torch.int
    %int14336_6332 = torch.constant.int 14336
    %5440 = torch.prim.ListConstruct %5439, %int14336_6332 : (!torch.int, !torch.int) -> !torch.list<int>
    %5441 = torch.aten.view %5436, %5440 : !torch.vtensor<[4,?,14336],f16>, !torch.list<int> -> !torch.vtensor<[?,14336],f16>
    torch.bind_symbolic_shape %5441, [%292], affine_map<()[s0] -> (s0 * 128, 14336)> : !torch.vtensor<[?,14336],f16>
    %5442 = torch.aten.mm %5441, %5437 : !torch.vtensor<[?,14336],f16>, !torch.vtensor<[14336,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %5442, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_6333 = torch.constant.int 4
    %int4096_6334 = torch.constant.int 4096
    %5443 = torch.prim.ListConstruct %int4_6333, %5438, %int4096_6334 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5444 = torch.aten.view %5442, %5443 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5444, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_6335 = torch.constant.int 1
    %5445 = torch.aten.add.Tensor %5411, %5444, %int1_6335 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5445, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int6_6336 = torch.constant.int 6
    %5446 = torch.prims.convert_element_type %5445, %int6_6336 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %5446, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_6337 = torch.constant.int 2
    %5447 = torch.aten.pow.Tensor_Scalar %5446, %int2_6337 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %5447, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int-1_6338 = torch.constant.int -1
    %5448 = torch.prim.ListConstruct %int-1_6338 : (!torch.int) -> !torch.list<int>
    %true_6339 = torch.constant.bool true
    %none_6340 = torch.constant.none
    %5449 = torch.aten.mean.dim %5447, %5448, %true_6339, %none_6340 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %5449, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_6341 = torch.constant.float 9.9999997473787516E-6
    %int1_6342 = torch.constant.int 1
    %5450 = torch.aten.add.Scalar %5449, %float9.999990e-06_6341, %int1_6342 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %5450, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %5451 = torch.aten.rsqrt %5450 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %5451, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %5452 = torch.aten.mul.Tensor %5446, %5451 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %5452, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %5453 = torch.aten.mul.Tensor %235, %5452 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %5453, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int5_6343 = torch.constant.int 5
    %5454 = torch.prims.convert_element_type %5453, %int5_6343 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5454, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int-2_6344 = torch.constant.int -2
    %int-1_6345 = torch.constant.int -1
    %5455 = torch.aten.transpose.int %236, %int-2_6344, %int-1_6345 : !torch.vtensor<[4096,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,4096],f16>
    %int4_6346 = torch.constant.int 4
    %5456 = torch.aten.mul.int %int4_6346, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_6347 = torch.constant.int 4096
    %5457 = torch.prim.ListConstruct %5456, %int4096_6347 : (!torch.int, !torch.int) -> !torch.list<int>
    %5458 = torch.aten.view %5454, %5457 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %5458, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %5459 = torch.aten.mm %5458, %5455 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %5459, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_6348 = torch.constant.int 4
    %int4096_6349 = torch.constant.int 4096
    %5460 = torch.prim.ListConstruct %int4_6348, %294, %int4096_6349 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5461 = torch.aten.view %5459, %5460 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5461, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int-2_6350 = torch.constant.int -2
    %int-1_6351 = torch.constant.int -1
    %5462 = torch.aten.transpose.int %237, %int-2_6350, %int-1_6351 : !torch.vtensor<[1024,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,1024],f16>
    %int4_6352 = torch.constant.int 4
    %5463 = torch.aten.mul.int %int4_6352, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_6353 = torch.constant.int 4096
    %5464 = torch.prim.ListConstruct %5463, %int4096_6353 : (!torch.int, !torch.int) -> !torch.list<int>
    %5465 = torch.aten.view %5454, %5464 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %5465, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %5466 = torch.aten.mm %5465, %5462 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1024],f16> -> !torch.vtensor<[?,1024],f16>
    torch.bind_symbolic_shape %5466, [%292], affine_map<()[s0] -> (s0 * 128, 1024)> : !torch.vtensor<[?,1024],f16>
    %int4_6354 = torch.constant.int 4
    %int1024_6355 = torch.constant.int 1024
    %5467 = torch.prim.ListConstruct %int4_6354, %294, %int1024_6355 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5468 = torch.aten.view %5466, %5467 : !torch.vtensor<[?,1024],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1024],f16>
    torch.bind_symbolic_shape %5468, [%292], affine_map<()[s0] -> (4, s0 * 32, 1024)> : !torch.vtensor<[4,?,1024],f16>
    %int-2_6356 = torch.constant.int -2
    %int-1_6357 = torch.constant.int -1
    %5469 = torch.aten.transpose.int %238, %int-2_6356, %int-1_6357 : !torch.vtensor<[1024,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,1024],f16>
    %int4_6358 = torch.constant.int 4
    %5470 = torch.aten.mul.int %int4_6358, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_6359 = torch.constant.int 4096
    %5471 = torch.prim.ListConstruct %5470, %int4096_6359 : (!torch.int, !torch.int) -> !torch.list<int>
    %5472 = torch.aten.view %5454, %5471 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %5472, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %5473 = torch.aten.mm %5472, %5469 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1024],f16> -> !torch.vtensor<[?,1024],f16>
    torch.bind_symbolic_shape %5473, [%292], affine_map<()[s0] -> (s0 * 128, 1024)> : !torch.vtensor<[?,1024],f16>
    %int4_6360 = torch.constant.int 4
    %int1024_6361 = torch.constant.int 1024
    %5474 = torch.prim.ListConstruct %int4_6360, %294, %int1024_6361 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5475 = torch.aten.view %5473, %5474 : !torch.vtensor<[?,1024],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1024],f16>
    torch.bind_symbolic_shape %5475, [%292], affine_map<()[s0] -> (4, s0 * 32, 1024)> : !torch.vtensor<[4,?,1024],f16>
    %int4_6362 = torch.constant.int 4
    %int32_6363 = torch.constant.int 32
    %int128_6364 = torch.constant.int 128
    %5476 = torch.prim.ListConstruct %int4_6362, %294, %int32_6363, %int128_6364 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5477 = torch.aten.view %5461, %5476 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %5477, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int4_6365 = torch.constant.int 4
    %int8_6366 = torch.constant.int 8
    %int128_6367 = torch.constant.int 128
    %5478 = torch.prim.ListConstruct %int4_6365, %294, %int8_6366, %int128_6367 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5479 = torch.aten.view %5468, %5478 : !torch.vtensor<[4,?,1024],f16>, !torch.list<int> -> !torch.vtensor<[4,?,8,128],f16>
    torch.bind_symbolic_shape %5479, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 128)> : !torch.vtensor<[4,?,8,128],f16>
    %int4_6368 = torch.constant.int 4
    %int8_6369 = torch.constant.int 8
    %int128_6370 = torch.constant.int 128
    %5480 = torch.prim.ListConstruct %int4_6368, %294, %int8_6369, %int128_6370 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5481 = torch.aten.view %5475, %5480 : !torch.vtensor<[4,?,1024],f16>, !torch.list<int> -> !torch.vtensor<[4,?,8,128],f16>
    torch.bind_symbolic_shape %5481, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 128)> : !torch.vtensor<[4,?,8,128],f16>
    %int131072_6371 = torch.constant.int 131072
    %none_6372 = torch.constant.none
    %none_6373 = torch.constant.none
    %cpu_6374 = torch.constant.device "cpu"
    %false_6375 = torch.constant.bool false
    %5482 = torch.aten.arange %int131072_6371, %none_6372, %none_6373, %cpu_6374, %false_6375 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64>
    %int0_6376 = torch.constant.int 0
    %int128_6377 = torch.constant.int 128
    %int2_6378 = torch.constant.int 2
    %none_6379 = torch.constant.none
    %none_6380 = torch.constant.none
    %cpu_6381 = torch.constant.device "cpu"
    %false_6382 = torch.constant.bool false
    %5483 = torch.aten.arange.start_step %int0_6376, %int128_6377, %int2_6378, %none_6379, %none_6380, %cpu_6381, %false_6382 : !torch.int, !torch.int, !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64>
    %int0_6383 = torch.constant.int 0
    %int0_6384 = torch.constant.int 0
    %int64_6385 = torch.constant.int 64
    %int1_6386 = torch.constant.int 1
    %5484 = torch.aten.slice.Tensor %5483, %int0_6383, %int0_6384, %int64_6385, %int1_6386 : !torch.vtensor<[64],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[64],si64>
    %int6_6387 = torch.constant.int 6
    %5485 = torch.prims.convert_element_type %5484, %int6_6387 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32>
    %int128_6388 = torch.constant.int 128
    %5486 = torch.aten.div.Scalar %5485, %int128_6388 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32>
    %float5.000000e05_6389 = torch.constant.float 5.000000e+05
    %5487 = torch.aten.pow.Scalar %float5.000000e05_6389, %5486 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %5488 = torch.aten.reciprocal %5487 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %float1.000000e00_6390 = torch.constant.float 1.000000e+00
    %5489 = torch.aten.mul.Scalar %5488, %float1.000000e00_6390 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32>
    %int131072_6391 = torch.constant.int 131072
    %int1_6392 = torch.constant.int 1
    %5490 = torch.prim.ListConstruct %int131072_6391, %int1_6392 : (!torch.int, !torch.int) -> !torch.list<int>
    %5491 = torch.aten.view %5482, %5490 : !torch.vtensor<[131072],si64>, !torch.list<int> -> !torch.vtensor<[131072,1],si64>
    %5492 = torch.aten.mul.Tensor %5491, %5489 : !torch.vtensor<[131072,1],si64>, !torch.vtensor<[64],f32> -> !torch.vtensor<[131072,64],f32>
    %5493 = torch.aten.cos %5492 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %5494 = torch.aten.sin %5492 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %5495 = torch.aten.complex %5493, %5494 : !torch.vtensor<[131072,64],f32>, !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],complex<f32>>
    %int1_6393 = torch.constant.int 1
    %5496 = torch.aten.size.int %5461, %int1_6393 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.int
    %int0_6394 = torch.constant.int 0
    %5497 = torch.aten.add.int %int0_6394, %5496 : !torch.int, !torch.int -> !torch.int
    %int0_6395 = torch.constant.int 0
    %int0_6396 = torch.constant.int 0
    %int1_6397 = torch.constant.int 1
    %5498 = torch.aten.slice.Tensor %5495, %int0_6395, %int0_6396, %5497, %int1_6397 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %5498, [%292], affine_map<()[s0] -> (s0 * 32, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_6398 = torch.constant.int 1
    %int0_6399 = torch.constant.int 0
    %int9223372036854775807_6400 = torch.constant.int 9223372036854775807
    %int1_6401 = torch.constant.int 1
    %5499 = torch.aten.slice.Tensor %5498, %int1_6398, %int0_6399, %int9223372036854775807_6400, %int1_6401 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %5499, [%292], affine_map<()[s0] -> (s0 * 32, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_6402 = torch.constant.int 0
    %5500 = torch.aten.unsqueeze %5499, %int0_6402 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %5500, [%292], affine_map<()[s0] -> (1, s0 * 32, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_6403 = torch.constant.int 2
    %5501 = torch.aten.unsqueeze %5500, %int2_6403 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %5501, [%292], affine_map<()[s0] -> (1, s0 * 32, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_6404 = torch.constant.int 3
    %int0_6405 = torch.constant.int 0
    %int9223372036854775807_6406 = torch.constant.int 9223372036854775807
    %int1_6407 = torch.constant.int 1
    %5502 = torch.aten.slice.Tensor %5501, %int3_6404, %int0_6405, %int9223372036854775807_6406, %int1_6407 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %5502, [%292], affine_map<()[s0] -> (1, s0 * 32, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %5503 = torch_c.to_builtin_tensor %5477 : !torch.vtensor<[4,?,32,128],f16> -> tensor<4x?x32x128xf16>
    %c1_6408 = arith.constant 1 : index
    %dim_6409 = tensor.dim %5503, %c1_6408 : tensor<4x?x32x128xf16>
    %5504 = flow.tensor.bitcast %5503 : tensor<4x?x32x128xf16>{%dim_6409} -> tensor<4x?x32x64xcomplex<f16>>{%dim_6409}
    %5505 = torch_c.from_builtin_tensor %5504 : tensor<4x?x32x64xcomplex<f16>> -> !torch.vtensor<[4,?,32,64],complex<f16>>
    torch.bind_symbolic_shape %5505, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 64)> : !torch.vtensor<[4,?,32,64],complex<f16>>
    %5506 = torch.aten.mul.Tensor %5505, %5502 : !torch.vtensor<[4,?,32,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,32,64],complex<f32>>
    torch.bind_symbolic_shape %5506, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 64)> : !torch.vtensor<[4,?,32,64],complex<f32>>
    %5507 = torch_c.to_builtin_tensor %5506 : !torch.vtensor<[4,?,32,64],complex<f32>> -> tensor<4x?x32x64xcomplex<f32>>
    %c1_6410 = arith.constant 1 : index
    %dim_6411 = tensor.dim %5507, %c1_6410 : tensor<4x?x32x64xcomplex<f32>>
    %5508 = flow.tensor.bitcast %5507 : tensor<4x?x32x64xcomplex<f32>>{%dim_6411} -> tensor<4x?x32x128xf32>{%dim_6411}
    %5509 = torch_c.from_builtin_tensor %5508 : tensor<4x?x32x128xf32> -> !torch.vtensor<[4,?,32,128],f32>
    torch.bind_symbolic_shape %5509, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f32>
    %int5_6412 = torch.constant.int 5
    %5510 = torch.prims.convert_element_type %5509, %int5_6412 : !torch.vtensor<[4,?,32,128],f32>, !torch.int -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %5510, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int131072_6413 = torch.constant.int 131072
    %none_6414 = torch.constant.none
    %none_6415 = torch.constant.none
    %cpu_6416 = torch.constant.device "cpu"
    %false_6417 = torch.constant.bool false
    %5511 = torch.aten.arange %int131072_6413, %none_6414, %none_6415, %cpu_6416, %false_6417 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64>
    %int0_6418 = torch.constant.int 0
    %int128_6419 = torch.constant.int 128
    %int2_6420 = torch.constant.int 2
    %none_6421 = torch.constant.none
    %none_6422 = torch.constant.none
    %cpu_6423 = torch.constant.device "cpu"
    %false_6424 = torch.constant.bool false
    %5512 = torch.aten.arange.start_step %int0_6418, %int128_6419, %int2_6420, %none_6421, %none_6422, %cpu_6423, %false_6424 : !torch.int, !torch.int, !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64>
    %int0_6425 = torch.constant.int 0
    %int0_6426 = torch.constant.int 0
    %int64_6427 = torch.constant.int 64
    %int1_6428 = torch.constant.int 1
    %5513 = torch.aten.slice.Tensor %5512, %int0_6425, %int0_6426, %int64_6427, %int1_6428 : !torch.vtensor<[64],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[64],si64>
    %int6_6429 = torch.constant.int 6
    %5514 = torch.prims.convert_element_type %5513, %int6_6429 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32>
    %int128_6430 = torch.constant.int 128
    %5515 = torch.aten.div.Scalar %5514, %int128_6430 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32>
    %float5.000000e05_6431 = torch.constant.float 5.000000e+05
    %5516 = torch.aten.pow.Scalar %float5.000000e05_6431, %5515 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %5517 = torch.aten.reciprocal %5516 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %float1.000000e00_6432 = torch.constant.float 1.000000e+00
    %5518 = torch.aten.mul.Scalar %5517, %float1.000000e00_6432 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32>
    %int131072_6433 = torch.constant.int 131072
    %int1_6434 = torch.constant.int 1
    %5519 = torch.prim.ListConstruct %int131072_6433, %int1_6434 : (!torch.int, !torch.int) -> !torch.list<int>
    %5520 = torch.aten.view %5511, %5519 : !torch.vtensor<[131072],si64>, !torch.list<int> -> !torch.vtensor<[131072,1],si64>
    %5521 = torch.aten.mul.Tensor %5520, %5518 : !torch.vtensor<[131072,1],si64>, !torch.vtensor<[64],f32> -> !torch.vtensor<[131072,64],f32>
    %5522 = torch.aten.cos %5521 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %5523 = torch.aten.sin %5521 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %5524 = torch.aten.complex %5522, %5523 : !torch.vtensor<[131072,64],f32>, !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],complex<f32>>
    %int1_6435 = torch.constant.int 1
    %5525 = torch.aten.size.int %5468, %int1_6435 : !torch.vtensor<[4,?,1024],f16>, !torch.int -> !torch.int
    %int0_6436 = torch.constant.int 0
    %5526 = torch.aten.add.int %int0_6436, %5525 : !torch.int, !torch.int -> !torch.int
    %int0_6437 = torch.constant.int 0
    %int0_6438 = torch.constant.int 0
    %int1_6439 = torch.constant.int 1
    %5527 = torch.aten.slice.Tensor %5524, %int0_6437, %int0_6438, %5526, %int1_6439 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %5527, [%292], affine_map<()[s0] -> (s0 * 32, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_6440 = torch.constant.int 1
    %int0_6441 = torch.constant.int 0
    %int9223372036854775807_6442 = torch.constant.int 9223372036854775807
    %int1_6443 = torch.constant.int 1
    %5528 = torch.aten.slice.Tensor %5527, %int1_6440, %int0_6441, %int9223372036854775807_6442, %int1_6443 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %5528, [%292], affine_map<()[s0] -> (s0 * 32, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_6444 = torch.constant.int 0
    %5529 = torch.aten.unsqueeze %5528, %int0_6444 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %5529, [%292], affine_map<()[s0] -> (1, s0 * 32, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_6445 = torch.constant.int 2
    %5530 = torch.aten.unsqueeze %5529, %int2_6445 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %5530, [%292], affine_map<()[s0] -> (1, s0 * 32, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_6446 = torch.constant.int 3
    %int0_6447 = torch.constant.int 0
    %int9223372036854775807_6448 = torch.constant.int 9223372036854775807
    %int1_6449 = torch.constant.int 1
    %5531 = torch.aten.slice.Tensor %5530, %int3_6446, %int0_6447, %int9223372036854775807_6448, %int1_6449 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %5531, [%292], affine_map<()[s0] -> (1, s0 * 32, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %5532 = torch_c.to_builtin_tensor %5479 : !torch.vtensor<[4,?,8,128],f16> -> tensor<4x?x8x128xf16>
    %c1_6450 = arith.constant 1 : index
    %dim_6451 = tensor.dim %5532, %c1_6450 : tensor<4x?x8x128xf16>
    %5533 = flow.tensor.bitcast %5532 : tensor<4x?x8x128xf16>{%dim_6451} -> tensor<4x?x8x64xcomplex<f16>>{%dim_6451}
    %5534 = torch_c.from_builtin_tensor %5533 : tensor<4x?x8x64xcomplex<f16>> -> !torch.vtensor<[4,?,8,64],complex<f16>>
    torch.bind_symbolic_shape %5534, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 64)> : !torch.vtensor<[4,?,8,64],complex<f16>>
    %5535 = torch.aten.mul.Tensor %5534, %5531 : !torch.vtensor<[4,?,8,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,8,64],complex<f32>>
    torch.bind_symbolic_shape %5535, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 64)> : !torch.vtensor<[4,?,8,64],complex<f32>>
    %5536 = torch_c.to_builtin_tensor %5535 : !torch.vtensor<[4,?,8,64],complex<f32>> -> tensor<4x?x8x64xcomplex<f32>>
    %c1_6452 = arith.constant 1 : index
    %dim_6453 = tensor.dim %5536, %c1_6452 : tensor<4x?x8x64xcomplex<f32>>
    %5537 = flow.tensor.bitcast %5536 : tensor<4x?x8x64xcomplex<f32>>{%dim_6453} -> tensor<4x?x8x128xf32>{%dim_6453}
    %5538 = torch_c.from_builtin_tensor %5537 : tensor<4x?x8x128xf32> -> !torch.vtensor<[4,?,8,128],f32>
    torch.bind_symbolic_shape %5538, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 128)> : !torch.vtensor<[4,?,8,128],f32>
    %int5_6454 = torch.constant.int 5
    %5539 = torch.prims.convert_element_type %5538, %int5_6454 : !torch.vtensor<[4,?,8,128],f32>, !torch.int -> !torch.vtensor<[4,?,8,128],f16>
    torch.bind_symbolic_shape %5539, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 128)> : !torch.vtensor<[4,?,8,128],f16>
    %int64_6455 = torch.constant.int 64
    %5540 = torch.aten.mul.Scalar %arg2, %int64_6455 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %5540, [%292], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int52 = torch.constant.int 52
    %int1_6456 = torch.constant.int 1
    %5541 = torch.aten.add.Scalar %5540, %int52, %int1_6456 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %5541, [%292], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int4_6457 = torch.constant.int 4
    %int32_6458 = torch.constant.int 32
    %int8_6459 = torch.constant.int 8
    %int128_6460 = torch.constant.int 128
    %5542 = torch.prim.ListConstruct %int4_6457, %425, %int32_6458, %int8_6459, %int128_6460 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5543 = torch.aten.view %5539, %5542 : !torch.vtensor<[4,?,8,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,8,128],f16>
    torch.bind_symbolic_shape %5543, [%292], affine_map<()[s0] -> (4, s0, 32, 8, 128)> : !torch.vtensor<[4,?,32,8,128],f16>
    %int4_6461 = torch.constant.int 4
    %5544 = torch.aten.mul.int %int4_6461, %425 : !torch.int, !torch.int -> !torch.int
    %int32_6462 = torch.constant.int 32
    %int8_6463 = torch.constant.int 8
    %int128_6464 = torch.constant.int 128
    %5545 = torch.prim.ListConstruct %5544, %int32_6462, %int8_6463, %int128_6464 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5546 = torch.aten.view %5543, %5545 : !torch.vtensor<[4,?,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %5546, [%292], affine_map<()[s0] -> (s0 * 4, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int4_6465 = torch.constant.int 4
    %5547 = torch.aten.mul.int %int4_6465, %425 : !torch.int, !torch.int -> !torch.int
    %5548 = torch.prim.ListConstruct %5547 : (!torch.int) -> !torch.list<int>
    %5549 = torch.aten.view %5541, %5548 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %5549, [%292], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int32_6466 = torch.constant.int 32
    %int2_6467 = torch.constant.int 2
    %int32_6468 = torch.constant.int 32
    %int8_6469 = torch.constant.int 8
    %int128_6470 = torch.constant.int 128
    %5550 = torch.prim.ListConstruct %416, %int32_6466, %int2_6467, %int32_6468, %int8_6469, %int128_6470 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5551 = torch.aten.view %5383, %5550 : !torch.vtensor<[?,2097152],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f16>
    torch.bind_symbolic_shape %5551, [%293], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f16>
    %int32_6471 = torch.constant.int 32
    %5552 = torch.aten.mul.int %416, %int32_6471 : !torch.int, !torch.int -> !torch.int
    %int2_6472 = torch.constant.int 2
    %5553 = torch.aten.mul.int %5552, %int2_6472 : !torch.int, !torch.int -> !torch.int
    %int32_6473 = torch.constant.int 32
    %int8_6474 = torch.constant.int 8
    %int128_6475 = torch.constant.int 128
    %5554 = torch.prim.ListConstruct %5553, %int32_6473, %int8_6474, %int128_6475 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5555 = torch.aten.view %5551, %5554 : !torch.vtensor<[?,32,2,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %5555, [%293], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %5556 = torch.prim.ListConstruct %5549 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_6476 = torch.constant.bool false
    %5557 = torch.aten.index_put %5555, %5556, %5546, %false_6476 : !torch.vtensor<[?,32,8,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,32,8,128],f16>, !torch.bool -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %5557, [%293], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int32_6477 = torch.constant.int 32
    %int2_6478 = torch.constant.int 2
    %int32_6479 = torch.constant.int 32
    %int8_6480 = torch.constant.int 8
    %int128_6481 = torch.constant.int 128
    %5558 = torch.prim.ListConstruct %416, %int32_6477, %int2_6478, %int32_6479, %int8_6480, %int128_6481 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5559 = torch.aten.view %5557, %5558 : !torch.vtensor<[?,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f16>
    torch.bind_symbolic_shape %5559, [%293], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f16>
    %int2097152_6482 = torch.constant.int 2097152
    %5560 = torch.prim.ListConstruct %416, %int2097152_6482 : (!torch.int, !torch.int) -> !torch.list<int>
    %5561 = torch.aten.view %5559, %5560 : !torch.vtensor<[?,32,2,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,2097152],f16>
    torch.bind_symbolic_shape %5561, [%293], affine_map<()[s0] -> (s0, 2097152)> : !torch.vtensor<[?,2097152],f16>
    %int32_6483 = torch.constant.int 32
    %int2_6484 = torch.constant.int 2
    %int32_6485 = torch.constant.int 32
    %int8_6486 = torch.constant.int 8
    %int128_6487 = torch.constant.int 128
    %5562 = torch.prim.ListConstruct %416, %int32_6483, %int2_6484, %int32_6485, %int8_6486, %int128_6487 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5563 = torch.aten.view %5561, %5562 : !torch.vtensor<[?,2097152],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f16>
    torch.bind_symbolic_shape %5563, [%293], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f16>
    %int32_6488 = torch.constant.int 32
    %int8_6489 = torch.constant.int 8
    %int128_6490 = torch.constant.int 128
    %5564 = torch.prim.ListConstruct %5553, %int32_6488, %int8_6489, %int128_6490 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5565 = torch.aten.view %5563, %5564 : !torch.vtensor<[?,32,2,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %5565, [%293], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int4_6491 = torch.constant.int 4
    %int32_6492 = torch.constant.int 32
    %int8_6493 = torch.constant.int 8
    %int128_6494 = torch.constant.int 128
    %5566 = torch.prim.ListConstruct %int4_6491, %425, %int32_6492, %int8_6493, %int128_6494 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5567 = torch.aten.view %5481, %5566 : !torch.vtensor<[4,?,8,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,8,128],f16>
    torch.bind_symbolic_shape %5567, [%292], affine_map<()[s0] -> (4, s0, 32, 8, 128)> : !torch.vtensor<[4,?,32,8,128],f16>
    %int4_6495 = torch.constant.int 4
    %5568 = torch.aten.mul.int %int4_6495, %425 : !torch.int, !torch.int -> !torch.int
    %int32_6496 = torch.constant.int 32
    %int8_6497 = torch.constant.int 8
    %int128_6498 = torch.constant.int 128
    %5569 = torch.prim.ListConstruct %5568, %int32_6496, %int8_6497, %int128_6498 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5570 = torch.aten.view %5567, %5569 : !torch.vtensor<[4,?,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %5570, [%292], affine_map<()[s0] -> (s0 * 4, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int1_6499 = torch.constant.int 1
    %int1_6500 = torch.constant.int 1
    %5571 = torch.aten.add.Scalar %5541, %int1_6499, %int1_6500 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %5571, [%292], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int4_6501 = torch.constant.int 4
    %5572 = torch.aten.mul.int %int4_6501, %425 : !torch.int, !torch.int -> !torch.int
    %5573 = torch.prim.ListConstruct %5572 : (!torch.int) -> !torch.list<int>
    %5574 = torch.aten.view %5571, %5573 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %5574, [%292], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %5575 = torch.prim.ListConstruct %5574 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_6502 = torch.constant.bool false
    %5576 = torch.aten.index_put %5565, %5575, %5570, %false_6502 : !torch.vtensor<[?,32,8,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,32,8,128],f16>, !torch.bool -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %5576, [%293], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int32_6503 = torch.constant.int 32
    %int2_6504 = torch.constant.int 2
    %int32_6505 = torch.constant.int 32
    %int8_6506 = torch.constant.int 8
    %int128_6507 = torch.constant.int 128
    %5577 = torch.prim.ListConstruct %416, %int32_6503, %int2_6504, %int32_6505, %int8_6506, %int128_6507 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5578 = torch.aten.view %5576, %5577 : !torch.vtensor<[?,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f16>
    torch.bind_symbolic_shape %5578, [%293], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f16>
    %int2097152_6508 = torch.constant.int 2097152
    %5579 = torch.prim.ListConstruct %416, %int2097152_6508 : (!torch.int, !torch.int) -> !torch.list<int>
    %5580 = torch.aten.view %5578, %5579 : !torch.vtensor<[?,32,2,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,2097152],f16>
    torch.bind_symbolic_shape %5580, [%293], affine_map<()[s0] -> (s0, 2097152)> : !torch.vtensor<[?,2097152],f16>
    %int-2_6509 = torch.constant.int -2
    %5581 = torch.aten.unsqueeze %5539, %int-2_6509 : !torch.vtensor<[4,?,8,128],f16>, !torch.int -> !torch.vtensor<[4,?,8,1,128],f16>
    torch.bind_symbolic_shape %5581, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 1, 128)> : !torch.vtensor<[4,?,8,1,128],f16>
    %int4_6510 = torch.constant.int 4
    %int8_6511 = torch.constant.int 8
    %int4_6512 = torch.constant.int 4
    %int128_6513 = torch.constant.int 128
    %5582 = torch.prim.ListConstruct %int4_6510, %5525, %int8_6511, %int4_6512, %int128_6513 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_6514 = torch.constant.bool false
    %5583 = torch.aten.expand %5581, %5582, %false_6514 : !torch.vtensor<[4,?,8,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,8,4,128],f16>
    torch.bind_symbolic_shape %5583, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 4, 128)> : !torch.vtensor<[4,?,8,4,128],f16>
    %int0_6515 = torch.constant.int 0
    %5584 = torch.aten.clone %5583, %int0_6515 : !torch.vtensor<[4,?,8,4,128],f16>, !torch.int -> !torch.vtensor<[4,?,8,4,128],f16>
    torch.bind_symbolic_shape %5584, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 4, 128)> : !torch.vtensor<[4,?,8,4,128],f16>
    %int4_6516 = torch.constant.int 4
    %int32_6517 = torch.constant.int 32
    %int128_6518 = torch.constant.int 128
    %5585 = torch.prim.ListConstruct %int4_6516, %5525, %int32_6517, %int128_6518 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5586 = torch.aten._unsafe_view %5584, %5585 : !torch.vtensor<[4,?,8,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %5586, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int-2_6519 = torch.constant.int -2
    %5587 = torch.aten.unsqueeze %5481, %int-2_6519 : !torch.vtensor<[4,?,8,128],f16>, !torch.int -> !torch.vtensor<[4,?,8,1,128],f16>
    torch.bind_symbolic_shape %5587, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 1, 128)> : !torch.vtensor<[4,?,8,1,128],f16>
    %int1_6520 = torch.constant.int 1
    %5588 = torch.aten.size.int %5475, %int1_6520 : !torch.vtensor<[4,?,1024],f16>, !torch.int -> !torch.int
    %int4_6521 = torch.constant.int 4
    %int8_6522 = torch.constant.int 8
    %int4_6523 = torch.constant.int 4
    %int128_6524 = torch.constant.int 128
    %5589 = torch.prim.ListConstruct %int4_6521, %5588, %int8_6522, %int4_6523, %int128_6524 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_6525 = torch.constant.bool false
    %5590 = torch.aten.expand %5587, %5589, %false_6525 : !torch.vtensor<[4,?,8,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,8,4,128],f16>
    torch.bind_symbolic_shape %5590, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 4, 128)> : !torch.vtensor<[4,?,8,4,128],f16>
    %int0_6526 = torch.constant.int 0
    %5591 = torch.aten.clone %5590, %int0_6526 : !torch.vtensor<[4,?,8,4,128],f16>, !torch.int -> !torch.vtensor<[4,?,8,4,128],f16>
    torch.bind_symbolic_shape %5591, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 4, 128)> : !torch.vtensor<[4,?,8,4,128],f16>
    %int4_6527 = torch.constant.int 4
    %int32_6528 = torch.constant.int 32
    %int128_6529 = torch.constant.int 128
    %5592 = torch.prim.ListConstruct %int4_6527, %5588, %int32_6528, %int128_6529 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5593 = torch.aten._unsafe_view %5591, %5592 : !torch.vtensor<[4,?,8,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %5593, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int1_6530 = torch.constant.int 1
    %int2_6531 = torch.constant.int 2
    %5594 = torch.aten.transpose.int %5510, %int1_6530, %int2_6531 : !torch.vtensor<[4,?,32,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,32,?,128],f16>
    torch.bind_symbolic_shape %5594, [%292], affine_map<()[s0] -> (4, 32, s0 * 32, 128)> : !torch.vtensor<[4,32,?,128],f16>
    %int1_6532 = torch.constant.int 1
    %int2_6533 = torch.constant.int 2
    %5595 = torch.aten.transpose.int %5586, %int1_6532, %int2_6533 : !torch.vtensor<[4,?,32,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,32,?,128],f16>
    torch.bind_symbolic_shape %5595, [%292], affine_map<()[s0] -> (4, 32, s0 * 32, 128)> : !torch.vtensor<[4,32,?,128],f16>
    %int1_6534 = torch.constant.int 1
    %int2_6535 = torch.constant.int 2
    %5596 = torch.aten.transpose.int %5593, %int1_6534, %int2_6535 : !torch.vtensor<[4,?,32,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,32,?,128],f16>
    torch.bind_symbolic_shape %5596, [%292], affine_map<()[s0] -> (4, 32, s0 * 32, 128)> : !torch.vtensor<[4,32,?,128],f16>
    %float0.000000e00_6536 = torch.constant.float 0.000000e+00
    %false_6537 = torch.constant.bool false
    %none_6538 = torch.constant.none
    %5597:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%5594, %5595, %5596, %float0.000000e00_6536, %false_6537, %320, %none_6538) : (!torch.vtensor<[4,32,?,128],f16>, !torch.vtensor<[4,32,?,128],f16>, !torch.vtensor<[4,32,?,128],f16>, !torch.float, !torch.bool, !torch.vtensor<[4,1,?,?],f16>, !torch.none) -> (!torch.vtensor<[4,32,?,128],f16>, !torch.vtensor<[4,32,?],f32>) 
    torch.bind_symbolic_shape %5597#0, [%292], affine_map<()[s0] -> (4, 32, s0 * 32, 128)> : !torch.vtensor<[4,32,?,128],f16>
    %int1_6539 = torch.constant.int 1
    %int2_6540 = torch.constant.int 2
    %5598 = torch.aten.transpose.int %5597#0, %int1_6539, %int2_6540 : !torch.vtensor<[4,32,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %5598, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int4_6541 = torch.constant.int 4
    %int4096_6542 = torch.constant.int 4096
    %5599 = torch.prim.ListConstruct %int4_6541, %5496, %int4096_6542 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5600 = torch.aten.view %5598, %5599 : !torch.vtensor<[4,?,32,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5600, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int-2_6543 = torch.constant.int -2
    %int-1_6544 = torch.constant.int -1
    %5601 = torch.aten.transpose.int %239, %int-2_6543, %int-1_6544 : !torch.vtensor<[4096,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,4096],f16>
    %int4_6545 = torch.constant.int 4
    %5602 = torch.aten.mul.int %int4_6545, %5496 : !torch.int, !torch.int -> !torch.int
    %int4096_6546 = torch.constant.int 4096
    %5603 = torch.prim.ListConstruct %5602, %int4096_6546 : (!torch.int, !torch.int) -> !torch.list<int>
    %5604 = torch.aten.view %5600, %5603 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %5604, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %5605 = torch.aten.mm %5604, %5601 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %5605, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_6547 = torch.constant.int 4
    %int4096_6548 = torch.constant.int 4096
    %5606 = torch.prim.ListConstruct %int4_6547, %5496, %int4096_6548 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5607 = torch.aten.view %5605, %5606 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5607, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_6549 = torch.constant.int 1
    %5608 = torch.aten.add.Tensor %5445, %5607, %int1_6549 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5608, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int6_6550 = torch.constant.int 6
    %5609 = torch.prims.convert_element_type %5608, %int6_6550 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %5609, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_6551 = torch.constant.int 2
    %5610 = torch.aten.pow.Tensor_Scalar %5609, %int2_6551 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %5610, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int-1_6552 = torch.constant.int -1
    %5611 = torch.prim.ListConstruct %int-1_6552 : (!torch.int) -> !torch.list<int>
    %true_6553 = torch.constant.bool true
    %none_6554 = torch.constant.none
    %5612 = torch.aten.mean.dim %5610, %5611, %true_6553, %none_6554 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %5612, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_6555 = torch.constant.float 9.9999997473787516E-6
    %int1_6556 = torch.constant.int 1
    %5613 = torch.aten.add.Scalar %5612, %float9.999990e-06_6555, %int1_6556 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %5613, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %5614 = torch.aten.rsqrt %5613 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %5614, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %5615 = torch.aten.mul.Tensor %5609, %5614 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %5615, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %5616 = torch.aten.mul.Tensor %240, %5615 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %5616, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int5_6557 = torch.constant.int 5
    %5617 = torch.prims.convert_element_type %5616, %int5_6557 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5617, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int-2_6558 = torch.constant.int -2
    %int-1_6559 = torch.constant.int -1
    %5618 = torch.aten.transpose.int %241, %int-2_6558, %int-1_6559 : !torch.vtensor<[14336,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,14336],f16>
    %int4_6560 = torch.constant.int 4
    %5619 = torch.aten.mul.int %int4_6560, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_6561 = torch.constant.int 4096
    %5620 = torch.prim.ListConstruct %5619, %int4096_6561 : (!torch.int, !torch.int) -> !torch.list<int>
    %5621 = torch.aten.view %5617, %5620 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %5621, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %5622 = torch.aten.mm %5621, %5618 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,14336],f16> -> !torch.vtensor<[?,14336],f16>
    torch.bind_symbolic_shape %5622, [%292], affine_map<()[s0] -> (s0 * 128, 14336)> : !torch.vtensor<[?,14336],f16>
    %int4_6562 = torch.constant.int 4
    %int14336_6563 = torch.constant.int 14336
    %5623 = torch.prim.ListConstruct %int4_6562, %294, %int14336_6563 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5624 = torch.aten.view %5622, %5623 : !torch.vtensor<[?,14336],f16>, !torch.list<int> -> !torch.vtensor<[4,?,14336],f16>
    torch.bind_symbolic_shape %5624, [%292], affine_map<()[s0] -> (4, s0 * 32, 14336)> : !torch.vtensor<[4,?,14336],f16>
    %5625 = torch.aten.silu %5624 : !torch.vtensor<[4,?,14336],f16> -> !torch.vtensor<[4,?,14336],f16>
    torch.bind_symbolic_shape %5625, [%292], affine_map<()[s0] -> (4, s0 * 32, 14336)> : !torch.vtensor<[4,?,14336],f16>
    %int-2_6564 = torch.constant.int -2
    %int-1_6565 = torch.constant.int -1
    %5626 = torch.aten.transpose.int %242, %int-2_6564, %int-1_6565 : !torch.vtensor<[14336,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,14336],f16>
    %int4_6566 = torch.constant.int 4
    %5627 = torch.aten.mul.int %int4_6566, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_6567 = torch.constant.int 4096
    %5628 = torch.prim.ListConstruct %5627, %int4096_6567 : (!torch.int, !torch.int) -> !torch.list<int>
    %5629 = torch.aten.view %5617, %5628 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %5629, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %5630 = torch.aten.mm %5629, %5626 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,14336],f16> -> !torch.vtensor<[?,14336],f16>
    torch.bind_symbolic_shape %5630, [%292], affine_map<()[s0] -> (s0 * 128, 14336)> : !torch.vtensor<[?,14336],f16>
    %int4_6568 = torch.constant.int 4
    %int14336_6569 = torch.constant.int 14336
    %5631 = torch.prim.ListConstruct %int4_6568, %294, %int14336_6569 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5632 = torch.aten.view %5630, %5631 : !torch.vtensor<[?,14336],f16>, !torch.list<int> -> !torch.vtensor<[4,?,14336],f16>
    torch.bind_symbolic_shape %5632, [%292], affine_map<()[s0] -> (4, s0 * 32, 14336)> : !torch.vtensor<[4,?,14336],f16>
    %5633 = torch.aten.mul.Tensor %5625, %5632 : !torch.vtensor<[4,?,14336],f16>, !torch.vtensor<[4,?,14336],f16> -> !torch.vtensor<[4,?,14336],f16>
    torch.bind_symbolic_shape %5633, [%292], affine_map<()[s0] -> (4, s0 * 32, 14336)> : !torch.vtensor<[4,?,14336],f16>
    %int-2_6570 = torch.constant.int -2
    %int-1_6571 = torch.constant.int -1
    %5634 = torch.aten.transpose.int %243, %int-2_6570, %int-1_6571 : !torch.vtensor<[4096,14336],f16>, !torch.int, !torch.int -> !torch.vtensor<[14336,4096],f16>
    %int1_6572 = torch.constant.int 1
    %5635 = torch.aten.size.int %5624, %int1_6572 : !torch.vtensor<[4,?,14336],f16>, !torch.int -> !torch.int
    %int4_6573 = torch.constant.int 4
    %5636 = torch.aten.mul.int %int4_6573, %5635 : !torch.int, !torch.int -> !torch.int
    %int14336_6574 = torch.constant.int 14336
    %5637 = torch.prim.ListConstruct %5636, %int14336_6574 : (!torch.int, !torch.int) -> !torch.list<int>
    %5638 = torch.aten.view %5633, %5637 : !torch.vtensor<[4,?,14336],f16>, !torch.list<int> -> !torch.vtensor<[?,14336],f16>
    torch.bind_symbolic_shape %5638, [%292], affine_map<()[s0] -> (s0 * 128, 14336)> : !torch.vtensor<[?,14336],f16>
    %5639 = torch.aten.mm %5638, %5634 : !torch.vtensor<[?,14336],f16>, !torch.vtensor<[14336,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %5639, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_6575 = torch.constant.int 4
    %int4096_6576 = torch.constant.int 4096
    %5640 = torch.prim.ListConstruct %int4_6575, %5635, %int4096_6576 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5641 = torch.aten.view %5639, %5640 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5641, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_6577 = torch.constant.int 1
    %5642 = torch.aten.add.Tensor %5608, %5641, %int1_6577 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5642, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int6_6578 = torch.constant.int 6
    %5643 = torch.prims.convert_element_type %5642, %int6_6578 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %5643, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_6579 = torch.constant.int 2
    %5644 = torch.aten.pow.Tensor_Scalar %5643, %int2_6579 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %5644, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int-1_6580 = torch.constant.int -1
    %5645 = torch.prim.ListConstruct %int-1_6580 : (!torch.int) -> !torch.list<int>
    %true_6581 = torch.constant.bool true
    %none_6582 = torch.constant.none
    %5646 = torch.aten.mean.dim %5644, %5645, %true_6581, %none_6582 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %5646, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_6583 = torch.constant.float 9.9999997473787516E-6
    %int1_6584 = torch.constant.int 1
    %5647 = torch.aten.add.Scalar %5646, %float9.999990e-06_6583, %int1_6584 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %5647, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %5648 = torch.aten.rsqrt %5647 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %5648, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %5649 = torch.aten.mul.Tensor %5643, %5648 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %5649, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %5650 = torch.aten.mul.Tensor %244, %5649 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %5650, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int5_6585 = torch.constant.int 5
    %5651 = torch.prims.convert_element_type %5650, %int5_6585 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5651, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int-2_6586 = torch.constant.int -2
    %int-1_6587 = torch.constant.int -1
    %5652 = torch.aten.transpose.int %245, %int-2_6586, %int-1_6587 : !torch.vtensor<[4096,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,4096],f16>
    %int4_6588 = torch.constant.int 4
    %5653 = torch.aten.mul.int %int4_6588, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_6589 = torch.constant.int 4096
    %5654 = torch.prim.ListConstruct %5653, %int4096_6589 : (!torch.int, !torch.int) -> !torch.list<int>
    %5655 = torch.aten.view %5651, %5654 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %5655, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %5656 = torch.aten.mm %5655, %5652 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %5656, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_6590 = torch.constant.int 4
    %int4096_6591 = torch.constant.int 4096
    %5657 = torch.prim.ListConstruct %int4_6590, %294, %int4096_6591 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5658 = torch.aten.view %5656, %5657 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5658, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int-2_6592 = torch.constant.int -2
    %int-1_6593 = torch.constant.int -1
    %5659 = torch.aten.transpose.int %246, %int-2_6592, %int-1_6593 : !torch.vtensor<[1024,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,1024],f16>
    %int4_6594 = torch.constant.int 4
    %5660 = torch.aten.mul.int %int4_6594, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_6595 = torch.constant.int 4096
    %5661 = torch.prim.ListConstruct %5660, %int4096_6595 : (!torch.int, !torch.int) -> !torch.list<int>
    %5662 = torch.aten.view %5651, %5661 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %5662, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %5663 = torch.aten.mm %5662, %5659 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1024],f16> -> !torch.vtensor<[?,1024],f16>
    torch.bind_symbolic_shape %5663, [%292], affine_map<()[s0] -> (s0 * 128, 1024)> : !torch.vtensor<[?,1024],f16>
    %int4_6596 = torch.constant.int 4
    %int1024_6597 = torch.constant.int 1024
    %5664 = torch.prim.ListConstruct %int4_6596, %294, %int1024_6597 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5665 = torch.aten.view %5663, %5664 : !torch.vtensor<[?,1024],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1024],f16>
    torch.bind_symbolic_shape %5665, [%292], affine_map<()[s0] -> (4, s0 * 32, 1024)> : !torch.vtensor<[4,?,1024],f16>
    %int-2_6598 = torch.constant.int -2
    %int-1_6599 = torch.constant.int -1
    %5666 = torch.aten.transpose.int %247, %int-2_6598, %int-1_6599 : !torch.vtensor<[1024,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,1024],f16>
    %int4_6600 = torch.constant.int 4
    %5667 = torch.aten.mul.int %int4_6600, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_6601 = torch.constant.int 4096
    %5668 = torch.prim.ListConstruct %5667, %int4096_6601 : (!torch.int, !torch.int) -> !torch.list<int>
    %5669 = torch.aten.view %5651, %5668 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %5669, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %5670 = torch.aten.mm %5669, %5666 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1024],f16> -> !torch.vtensor<[?,1024],f16>
    torch.bind_symbolic_shape %5670, [%292], affine_map<()[s0] -> (s0 * 128, 1024)> : !torch.vtensor<[?,1024],f16>
    %int4_6602 = torch.constant.int 4
    %int1024_6603 = torch.constant.int 1024
    %5671 = torch.prim.ListConstruct %int4_6602, %294, %int1024_6603 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5672 = torch.aten.view %5670, %5671 : !torch.vtensor<[?,1024],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1024],f16>
    torch.bind_symbolic_shape %5672, [%292], affine_map<()[s0] -> (4, s0 * 32, 1024)> : !torch.vtensor<[4,?,1024],f16>
    %int4_6604 = torch.constant.int 4
    %int32_6605 = torch.constant.int 32
    %int128_6606 = torch.constant.int 128
    %5673 = torch.prim.ListConstruct %int4_6604, %294, %int32_6605, %int128_6606 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5674 = torch.aten.view %5658, %5673 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %5674, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int4_6607 = torch.constant.int 4
    %int8_6608 = torch.constant.int 8
    %int128_6609 = torch.constant.int 128
    %5675 = torch.prim.ListConstruct %int4_6607, %294, %int8_6608, %int128_6609 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5676 = torch.aten.view %5665, %5675 : !torch.vtensor<[4,?,1024],f16>, !torch.list<int> -> !torch.vtensor<[4,?,8,128],f16>
    torch.bind_symbolic_shape %5676, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 128)> : !torch.vtensor<[4,?,8,128],f16>
    %int4_6610 = torch.constant.int 4
    %int8_6611 = torch.constant.int 8
    %int128_6612 = torch.constant.int 128
    %5677 = torch.prim.ListConstruct %int4_6610, %294, %int8_6611, %int128_6612 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5678 = torch.aten.view %5672, %5677 : !torch.vtensor<[4,?,1024],f16>, !torch.list<int> -> !torch.vtensor<[4,?,8,128],f16>
    torch.bind_symbolic_shape %5678, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 128)> : !torch.vtensor<[4,?,8,128],f16>
    %int131072_6613 = torch.constant.int 131072
    %none_6614 = torch.constant.none
    %none_6615 = torch.constant.none
    %cpu_6616 = torch.constant.device "cpu"
    %false_6617 = torch.constant.bool false
    %5679 = torch.aten.arange %int131072_6613, %none_6614, %none_6615, %cpu_6616, %false_6617 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64>
    %int0_6618 = torch.constant.int 0
    %int128_6619 = torch.constant.int 128
    %int2_6620 = torch.constant.int 2
    %none_6621 = torch.constant.none
    %none_6622 = torch.constant.none
    %cpu_6623 = torch.constant.device "cpu"
    %false_6624 = torch.constant.bool false
    %5680 = torch.aten.arange.start_step %int0_6618, %int128_6619, %int2_6620, %none_6621, %none_6622, %cpu_6623, %false_6624 : !torch.int, !torch.int, !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64>
    %int0_6625 = torch.constant.int 0
    %int0_6626 = torch.constant.int 0
    %int64_6627 = torch.constant.int 64
    %int1_6628 = torch.constant.int 1
    %5681 = torch.aten.slice.Tensor %5680, %int0_6625, %int0_6626, %int64_6627, %int1_6628 : !torch.vtensor<[64],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[64],si64>
    %int6_6629 = torch.constant.int 6
    %5682 = torch.prims.convert_element_type %5681, %int6_6629 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32>
    %int128_6630 = torch.constant.int 128
    %5683 = torch.aten.div.Scalar %5682, %int128_6630 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32>
    %float5.000000e05_6631 = torch.constant.float 5.000000e+05
    %5684 = torch.aten.pow.Scalar %float5.000000e05_6631, %5683 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %5685 = torch.aten.reciprocal %5684 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %float1.000000e00_6632 = torch.constant.float 1.000000e+00
    %5686 = torch.aten.mul.Scalar %5685, %float1.000000e00_6632 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32>
    %int131072_6633 = torch.constant.int 131072
    %int1_6634 = torch.constant.int 1
    %5687 = torch.prim.ListConstruct %int131072_6633, %int1_6634 : (!torch.int, !torch.int) -> !torch.list<int>
    %5688 = torch.aten.view %5679, %5687 : !torch.vtensor<[131072],si64>, !torch.list<int> -> !torch.vtensor<[131072,1],si64>
    %5689 = torch.aten.mul.Tensor %5688, %5686 : !torch.vtensor<[131072,1],si64>, !torch.vtensor<[64],f32> -> !torch.vtensor<[131072,64],f32>
    %5690 = torch.aten.cos %5689 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %5691 = torch.aten.sin %5689 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %5692 = torch.aten.complex %5690, %5691 : !torch.vtensor<[131072,64],f32>, !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],complex<f32>>
    %int1_6635 = torch.constant.int 1
    %5693 = torch.aten.size.int %5658, %int1_6635 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.int
    %int0_6636 = torch.constant.int 0
    %5694 = torch.aten.add.int %int0_6636, %5693 : !torch.int, !torch.int -> !torch.int
    %int0_6637 = torch.constant.int 0
    %int0_6638 = torch.constant.int 0
    %int1_6639 = torch.constant.int 1
    %5695 = torch.aten.slice.Tensor %5692, %int0_6637, %int0_6638, %5694, %int1_6639 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %5695, [%292], affine_map<()[s0] -> (s0 * 32, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_6640 = torch.constant.int 1
    %int0_6641 = torch.constant.int 0
    %int9223372036854775807_6642 = torch.constant.int 9223372036854775807
    %int1_6643 = torch.constant.int 1
    %5696 = torch.aten.slice.Tensor %5695, %int1_6640, %int0_6641, %int9223372036854775807_6642, %int1_6643 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %5696, [%292], affine_map<()[s0] -> (s0 * 32, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_6644 = torch.constant.int 0
    %5697 = torch.aten.unsqueeze %5696, %int0_6644 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %5697, [%292], affine_map<()[s0] -> (1, s0 * 32, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_6645 = torch.constant.int 2
    %5698 = torch.aten.unsqueeze %5697, %int2_6645 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %5698, [%292], affine_map<()[s0] -> (1, s0 * 32, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_6646 = torch.constant.int 3
    %int0_6647 = torch.constant.int 0
    %int9223372036854775807_6648 = torch.constant.int 9223372036854775807
    %int1_6649 = torch.constant.int 1
    %5699 = torch.aten.slice.Tensor %5698, %int3_6646, %int0_6647, %int9223372036854775807_6648, %int1_6649 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %5699, [%292], affine_map<()[s0] -> (1, s0 * 32, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %5700 = torch_c.to_builtin_tensor %5674 : !torch.vtensor<[4,?,32,128],f16> -> tensor<4x?x32x128xf16>
    %c1_6650 = arith.constant 1 : index
    %dim_6651 = tensor.dim %5700, %c1_6650 : tensor<4x?x32x128xf16>
    %5701 = flow.tensor.bitcast %5700 : tensor<4x?x32x128xf16>{%dim_6651} -> tensor<4x?x32x64xcomplex<f16>>{%dim_6651}
    %5702 = torch_c.from_builtin_tensor %5701 : tensor<4x?x32x64xcomplex<f16>> -> !torch.vtensor<[4,?,32,64],complex<f16>>
    torch.bind_symbolic_shape %5702, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 64)> : !torch.vtensor<[4,?,32,64],complex<f16>>
    %5703 = torch.aten.mul.Tensor %5702, %5699 : !torch.vtensor<[4,?,32,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,32,64],complex<f32>>
    torch.bind_symbolic_shape %5703, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 64)> : !torch.vtensor<[4,?,32,64],complex<f32>>
    %5704 = torch_c.to_builtin_tensor %5703 : !torch.vtensor<[4,?,32,64],complex<f32>> -> tensor<4x?x32x64xcomplex<f32>>
    %c1_6652 = arith.constant 1 : index
    %dim_6653 = tensor.dim %5704, %c1_6652 : tensor<4x?x32x64xcomplex<f32>>
    %5705 = flow.tensor.bitcast %5704 : tensor<4x?x32x64xcomplex<f32>>{%dim_6653} -> tensor<4x?x32x128xf32>{%dim_6653}
    %5706 = torch_c.from_builtin_tensor %5705 : tensor<4x?x32x128xf32> -> !torch.vtensor<[4,?,32,128],f32>
    torch.bind_symbolic_shape %5706, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f32>
    %int5_6654 = torch.constant.int 5
    %5707 = torch.prims.convert_element_type %5706, %int5_6654 : !torch.vtensor<[4,?,32,128],f32>, !torch.int -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %5707, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int131072_6655 = torch.constant.int 131072
    %none_6656 = torch.constant.none
    %none_6657 = torch.constant.none
    %cpu_6658 = torch.constant.device "cpu"
    %false_6659 = torch.constant.bool false
    %5708 = torch.aten.arange %int131072_6655, %none_6656, %none_6657, %cpu_6658, %false_6659 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64>
    %int0_6660 = torch.constant.int 0
    %int128_6661 = torch.constant.int 128
    %int2_6662 = torch.constant.int 2
    %none_6663 = torch.constant.none
    %none_6664 = torch.constant.none
    %cpu_6665 = torch.constant.device "cpu"
    %false_6666 = torch.constant.bool false
    %5709 = torch.aten.arange.start_step %int0_6660, %int128_6661, %int2_6662, %none_6663, %none_6664, %cpu_6665, %false_6666 : !torch.int, !torch.int, !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64>
    %int0_6667 = torch.constant.int 0
    %int0_6668 = torch.constant.int 0
    %int64_6669 = torch.constant.int 64
    %int1_6670 = torch.constant.int 1
    %5710 = torch.aten.slice.Tensor %5709, %int0_6667, %int0_6668, %int64_6669, %int1_6670 : !torch.vtensor<[64],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[64],si64>
    %int6_6671 = torch.constant.int 6
    %5711 = torch.prims.convert_element_type %5710, %int6_6671 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32>
    %int128_6672 = torch.constant.int 128
    %5712 = torch.aten.div.Scalar %5711, %int128_6672 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32>
    %float5.000000e05_6673 = torch.constant.float 5.000000e+05
    %5713 = torch.aten.pow.Scalar %float5.000000e05_6673, %5712 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %5714 = torch.aten.reciprocal %5713 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %float1.000000e00_6674 = torch.constant.float 1.000000e+00
    %5715 = torch.aten.mul.Scalar %5714, %float1.000000e00_6674 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32>
    %int131072_6675 = torch.constant.int 131072
    %int1_6676 = torch.constant.int 1
    %5716 = torch.prim.ListConstruct %int131072_6675, %int1_6676 : (!torch.int, !torch.int) -> !torch.list<int>
    %5717 = torch.aten.view %5708, %5716 : !torch.vtensor<[131072],si64>, !torch.list<int> -> !torch.vtensor<[131072,1],si64>
    %5718 = torch.aten.mul.Tensor %5717, %5715 : !torch.vtensor<[131072,1],si64>, !torch.vtensor<[64],f32> -> !torch.vtensor<[131072,64],f32>
    %5719 = torch.aten.cos %5718 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %5720 = torch.aten.sin %5718 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %5721 = torch.aten.complex %5719, %5720 : !torch.vtensor<[131072,64],f32>, !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],complex<f32>>
    %int1_6677 = torch.constant.int 1
    %5722 = torch.aten.size.int %5665, %int1_6677 : !torch.vtensor<[4,?,1024],f16>, !torch.int -> !torch.int
    %int0_6678 = torch.constant.int 0
    %5723 = torch.aten.add.int %int0_6678, %5722 : !torch.int, !torch.int -> !torch.int
    %int0_6679 = torch.constant.int 0
    %int0_6680 = torch.constant.int 0
    %int1_6681 = torch.constant.int 1
    %5724 = torch.aten.slice.Tensor %5721, %int0_6679, %int0_6680, %5723, %int1_6681 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %5724, [%292], affine_map<()[s0] -> (s0 * 32, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_6682 = torch.constant.int 1
    %int0_6683 = torch.constant.int 0
    %int9223372036854775807_6684 = torch.constant.int 9223372036854775807
    %int1_6685 = torch.constant.int 1
    %5725 = torch.aten.slice.Tensor %5724, %int1_6682, %int0_6683, %int9223372036854775807_6684, %int1_6685 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %5725, [%292], affine_map<()[s0] -> (s0 * 32, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_6686 = torch.constant.int 0
    %5726 = torch.aten.unsqueeze %5725, %int0_6686 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %5726, [%292], affine_map<()[s0] -> (1, s0 * 32, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_6687 = torch.constant.int 2
    %5727 = torch.aten.unsqueeze %5726, %int2_6687 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %5727, [%292], affine_map<()[s0] -> (1, s0 * 32, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_6688 = torch.constant.int 3
    %int0_6689 = torch.constant.int 0
    %int9223372036854775807_6690 = torch.constant.int 9223372036854775807
    %int1_6691 = torch.constant.int 1
    %5728 = torch.aten.slice.Tensor %5727, %int3_6688, %int0_6689, %int9223372036854775807_6690, %int1_6691 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %5728, [%292], affine_map<()[s0] -> (1, s0 * 32, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %5729 = torch_c.to_builtin_tensor %5676 : !torch.vtensor<[4,?,8,128],f16> -> tensor<4x?x8x128xf16>
    %c1_6692 = arith.constant 1 : index
    %dim_6693 = tensor.dim %5729, %c1_6692 : tensor<4x?x8x128xf16>
    %5730 = flow.tensor.bitcast %5729 : tensor<4x?x8x128xf16>{%dim_6693} -> tensor<4x?x8x64xcomplex<f16>>{%dim_6693}
    %5731 = torch_c.from_builtin_tensor %5730 : tensor<4x?x8x64xcomplex<f16>> -> !torch.vtensor<[4,?,8,64],complex<f16>>
    torch.bind_symbolic_shape %5731, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 64)> : !torch.vtensor<[4,?,8,64],complex<f16>>
    %5732 = torch.aten.mul.Tensor %5731, %5728 : !torch.vtensor<[4,?,8,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,8,64],complex<f32>>
    torch.bind_symbolic_shape %5732, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 64)> : !torch.vtensor<[4,?,8,64],complex<f32>>
    %5733 = torch_c.to_builtin_tensor %5732 : !torch.vtensor<[4,?,8,64],complex<f32>> -> tensor<4x?x8x64xcomplex<f32>>
    %c1_6694 = arith.constant 1 : index
    %dim_6695 = tensor.dim %5733, %c1_6694 : tensor<4x?x8x64xcomplex<f32>>
    %5734 = flow.tensor.bitcast %5733 : tensor<4x?x8x64xcomplex<f32>>{%dim_6695} -> tensor<4x?x8x128xf32>{%dim_6695}
    %5735 = torch_c.from_builtin_tensor %5734 : tensor<4x?x8x128xf32> -> !torch.vtensor<[4,?,8,128],f32>
    torch.bind_symbolic_shape %5735, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 128)> : !torch.vtensor<[4,?,8,128],f32>
    %int5_6696 = torch.constant.int 5
    %5736 = torch.prims.convert_element_type %5735, %int5_6696 : !torch.vtensor<[4,?,8,128],f32>, !torch.int -> !torch.vtensor<[4,?,8,128],f16>
    torch.bind_symbolic_shape %5736, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 128)> : !torch.vtensor<[4,?,8,128],f16>
    %int64_6697 = torch.constant.int 64
    %5737 = torch.aten.mul.Scalar %arg2, %int64_6697 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %5737, [%292], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int54 = torch.constant.int 54
    %int1_6698 = torch.constant.int 1
    %5738 = torch.aten.add.Scalar %5737, %int54, %int1_6698 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %5738, [%292], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int4_6699 = torch.constant.int 4
    %int32_6700 = torch.constant.int 32
    %int8_6701 = torch.constant.int 8
    %int128_6702 = torch.constant.int 128
    %5739 = torch.prim.ListConstruct %int4_6699, %425, %int32_6700, %int8_6701, %int128_6702 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5740 = torch.aten.view %5736, %5739 : !torch.vtensor<[4,?,8,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,8,128],f16>
    torch.bind_symbolic_shape %5740, [%292], affine_map<()[s0] -> (4, s0, 32, 8, 128)> : !torch.vtensor<[4,?,32,8,128],f16>
    %int4_6703 = torch.constant.int 4
    %5741 = torch.aten.mul.int %int4_6703, %425 : !torch.int, !torch.int -> !torch.int
    %int32_6704 = torch.constant.int 32
    %int8_6705 = torch.constant.int 8
    %int128_6706 = torch.constant.int 128
    %5742 = torch.prim.ListConstruct %5741, %int32_6704, %int8_6705, %int128_6706 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5743 = torch.aten.view %5740, %5742 : !torch.vtensor<[4,?,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %5743, [%292], affine_map<()[s0] -> (s0 * 4, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int4_6707 = torch.constant.int 4
    %5744 = torch.aten.mul.int %int4_6707, %425 : !torch.int, !torch.int -> !torch.int
    %5745 = torch.prim.ListConstruct %5744 : (!torch.int) -> !torch.list<int>
    %5746 = torch.aten.view %5738, %5745 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %5746, [%292], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int32_6708 = torch.constant.int 32
    %int2_6709 = torch.constant.int 2
    %int32_6710 = torch.constant.int 32
    %int8_6711 = torch.constant.int 8
    %int128_6712 = torch.constant.int 128
    %5747 = torch.prim.ListConstruct %416, %int32_6708, %int2_6709, %int32_6710, %int8_6711, %int128_6712 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5748 = torch.aten.view %5580, %5747 : !torch.vtensor<[?,2097152],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f16>
    torch.bind_symbolic_shape %5748, [%293], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f16>
    %int32_6713 = torch.constant.int 32
    %5749 = torch.aten.mul.int %416, %int32_6713 : !torch.int, !torch.int -> !torch.int
    %int2_6714 = torch.constant.int 2
    %5750 = torch.aten.mul.int %5749, %int2_6714 : !torch.int, !torch.int -> !torch.int
    %int32_6715 = torch.constant.int 32
    %int8_6716 = torch.constant.int 8
    %int128_6717 = torch.constant.int 128
    %5751 = torch.prim.ListConstruct %5750, %int32_6715, %int8_6716, %int128_6717 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5752 = torch.aten.view %5748, %5751 : !torch.vtensor<[?,32,2,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %5752, [%293], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %5753 = torch.prim.ListConstruct %5746 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_6718 = torch.constant.bool false
    %5754 = torch.aten.index_put %5752, %5753, %5743, %false_6718 : !torch.vtensor<[?,32,8,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,32,8,128],f16>, !torch.bool -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %5754, [%293], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int32_6719 = torch.constant.int 32
    %int2_6720 = torch.constant.int 2
    %int32_6721 = torch.constant.int 32
    %int8_6722 = torch.constant.int 8
    %int128_6723 = torch.constant.int 128
    %5755 = torch.prim.ListConstruct %416, %int32_6719, %int2_6720, %int32_6721, %int8_6722, %int128_6723 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5756 = torch.aten.view %5754, %5755 : !torch.vtensor<[?,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f16>
    torch.bind_symbolic_shape %5756, [%293], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f16>
    %int2097152_6724 = torch.constant.int 2097152
    %5757 = torch.prim.ListConstruct %416, %int2097152_6724 : (!torch.int, !torch.int) -> !torch.list<int>
    %5758 = torch.aten.view %5756, %5757 : !torch.vtensor<[?,32,2,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,2097152],f16>
    torch.bind_symbolic_shape %5758, [%293], affine_map<()[s0] -> (s0, 2097152)> : !torch.vtensor<[?,2097152],f16>
    %int32_6725 = torch.constant.int 32
    %int2_6726 = torch.constant.int 2
    %int32_6727 = torch.constant.int 32
    %int8_6728 = torch.constant.int 8
    %int128_6729 = torch.constant.int 128
    %5759 = torch.prim.ListConstruct %416, %int32_6725, %int2_6726, %int32_6727, %int8_6728, %int128_6729 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5760 = torch.aten.view %5758, %5759 : !torch.vtensor<[?,2097152],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f16>
    torch.bind_symbolic_shape %5760, [%293], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f16>
    %int32_6730 = torch.constant.int 32
    %int8_6731 = torch.constant.int 8
    %int128_6732 = torch.constant.int 128
    %5761 = torch.prim.ListConstruct %5750, %int32_6730, %int8_6731, %int128_6732 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5762 = torch.aten.view %5760, %5761 : !torch.vtensor<[?,32,2,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %5762, [%293], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int4_6733 = torch.constant.int 4
    %int32_6734 = torch.constant.int 32
    %int8_6735 = torch.constant.int 8
    %int128_6736 = torch.constant.int 128
    %5763 = torch.prim.ListConstruct %int4_6733, %425, %int32_6734, %int8_6735, %int128_6736 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5764 = torch.aten.view %5678, %5763 : !torch.vtensor<[4,?,8,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,8,128],f16>
    torch.bind_symbolic_shape %5764, [%292], affine_map<()[s0] -> (4, s0, 32, 8, 128)> : !torch.vtensor<[4,?,32,8,128],f16>
    %int4_6737 = torch.constant.int 4
    %5765 = torch.aten.mul.int %int4_6737, %425 : !torch.int, !torch.int -> !torch.int
    %int32_6738 = torch.constant.int 32
    %int8_6739 = torch.constant.int 8
    %int128_6740 = torch.constant.int 128
    %5766 = torch.prim.ListConstruct %5765, %int32_6738, %int8_6739, %int128_6740 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5767 = torch.aten.view %5764, %5766 : !torch.vtensor<[4,?,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %5767, [%292], affine_map<()[s0] -> (s0 * 4, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int1_6741 = torch.constant.int 1
    %int1_6742 = torch.constant.int 1
    %5768 = torch.aten.add.Scalar %5738, %int1_6741, %int1_6742 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %5768, [%292], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int4_6743 = torch.constant.int 4
    %5769 = torch.aten.mul.int %int4_6743, %425 : !torch.int, !torch.int -> !torch.int
    %5770 = torch.prim.ListConstruct %5769 : (!torch.int) -> !torch.list<int>
    %5771 = torch.aten.view %5768, %5770 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %5771, [%292], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %5772 = torch.prim.ListConstruct %5771 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_6744 = torch.constant.bool false
    %5773 = torch.aten.index_put %5762, %5772, %5767, %false_6744 : !torch.vtensor<[?,32,8,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,32,8,128],f16>, !torch.bool -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %5773, [%293], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int32_6745 = torch.constant.int 32
    %int2_6746 = torch.constant.int 2
    %int32_6747 = torch.constant.int 32
    %int8_6748 = torch.constant.int 8
    %int128_6749 = torch.constant.int 128
    %5774 = torch.prim.ListConstruct %416, %int32_6745, %int2_6746, %int32_6747, %int8_6748, %int128_6749 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5775 = torch.aten.view %5773, %5774 : !torch.vtensor<[?,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f16>
    torch.bind_symbolic_shape %5775, [%293], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f16>
    %int2097152_6750 = torch.constant.int 2097152
    %5776 = torch.prim.ListConstruct %416, %int2097152_6750 : (!torch.int, !torch.int) -> !torch.list<int>
    %5777 = torch.aten.view %5775, %5776 : !torch.vtensor<[?,32,2,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,2097152],f16>
    torch.bind_symbolic_shape %5777, [%293], affine_map<()[s0] -> (s0, 2097152)> : !torch.vtensor<[?,2097152],f16>
    %int-2_6751 = torch.constant.int -2
    %5778 = torch.aten.unsqueeze %5736, %int-2_6751 : !torch.vtensor<[4,?,8,128],f16>, !torch.int -> !torch.vtensor<[4,?,8,1,128],f16>
    torch.bind_symbolic_shape %5778, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 1, 128)> : !torch.vtensor<[4,?,8,1,128],f16>
    %int4_6752 = torch.constant.int 4
    %int8_6753 = torch.constant.int 8
    %int4_6754 = torch.constant.int 4
    %int128_6755 = torch.constant.int 128
    %5779 = torch.prim.ListConstruct %int4_6752, %5722, %int8_6753, %int4_6754, %int128_6755 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_6756 = torch.constant.bool false
    %5780 = torch.aten.expand %5778, %5779, %false_6756 : !torch.vtensor<[4,?,8,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,8,4,128],f16>
    torch.bind_symbolic_shape %5780, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 4, 128)> : !torch.vtensor<[4,?,8,4,128],f16>
    %int0_6757 = torch.constant.int 0
    %5781 = torch.aten.clone %5780, %int0_6757 : !torch.vtensor<[4,?,8,4,128],f16>, !torch.int -> !torch.vtensor<[4,?,8,4,128],f16>
    torch.bind_symbolic_shape %5781, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 4, 128)> : !torch.vtensor<[4,?,8,4,128],f16>
    %int4_6758 = torch.constant.int 4
    %int32_6759 = torch.constant.int 32
    %int128_6760 = torch.constant.int 128
    %5782 = torch.prim.ListConstruct %int4_6758, %5722, %int32_6759, %int128_6760 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5783 = torch.aten._unsafe_view %5781, %5782 : !torch.vtensor<[4,?,8,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %5783, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int-2_6761 = torch.constant.int -2
    %5784 = torch.aten.unsqueeze %5678, %int-2_6761 : !torch.vtensor<[4,?,8,128],f16>, !torch.int -> !torch.vtensor<[4,?,8,1,128],f16>
    torch.bind_symbolic_shape %5784, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 1, 128)> : !torch.vtensor<[4,?,8,1,128],f16>
    %int1_6762 = torch.constant.int 1
    %5785 = torch.aten.size.int %5672, %int1_6762 : !torch.vtensor<[4,?,1024],f16>, !torch.int -> !torch.int
    %int4_6763 = torch.constant.int 4
    %int8_6764 = torch.constant.int 8
    %int4_6765 = torch.constant.int 4
    %int128_6766 = torch.constant.int 128
    %5786 = torch.prim.ListConstruct %int4_6763, %5785, %int8_6764, %int4_6765, %int128_6766 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_6767 = torch.constant.bool false
    %5787 = torch.aten.expand %5784, %5786, %false_6767 : !torch.vtensor<[4,?,8,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,8,4,128],f16>
    torch.bind_symbolic_shape %5787, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 4, 128)> : !torch.vtensor<[4,?,8,4,128],f16>
    %int0_6768 = torch.constant.int 0
    %5788 = torch.aten.clone %5787, %int0_6768 : !torch.vtensor<[4,?,8,4,128],f16>, !torch.int -> !torch.vtensor<[4,?,8,4,128],f16>
    torch.bind_symbolic_shape %5788, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 4, 128)> : !torch.vtensor<[4,?,8,4,128],f16>
    %int4_6769 = torch.constant.int 4
    %int32_6770 = torch.constant.int 32
    %int128_6771 = torch.constant.int 128
    %5789 = torch.prim.ListConstruct %int4_6769, %5785, %int32_6770, %int128_6771 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5790 = torch.aten._unsafe_view %5788, %5789 : !torch.vtensor<[4,?,8,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %5790, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int1_6772 = torch.constant.int 1
    %int2_6773 = torch.constant.int 2
    %5791 = torch.aten.transpose.int %5707, %int1_6772, %int2_6773 : !torch.vtensor<[4,?,32,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,32,?,128],f16>
    torch.bind_symbolic_shape %5791, [%292], affine_map<()[s0] -> (4, 32, s0 * 32, 128)> : !torch.vtensor<[4,32,?,128],f16>
    %int1_6774 = torch.constant.int 1
    %int2_6775 = torch.constant.int 2
    %5792 = torch.aten.transpose.int %5783, %int1_6774, %int2_6775 : !torch.vtensor<[4,?,32,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,32,?,128],f16>
    torch.bind_symbolic_shape %5792, [%292], affine_map<()[s0] -> (4, 32, s0 * 32, 128)> : !torch.vtensor<[4,32,?,128],f16>
    %int1_6776 = torch.constant.int 1
    %int2_6777 = torch.constant.int 2
    %5793 = torch.aten.transpose.int %5790, %int1_6776, %int2_6777 : !torch.vtensor<[4,?,32,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,32,?,128],f16>
    torch.bind_symbolic_shape %5793, [%292], affine_map<()[s0] -> (4, 32, s0 * 32, 128)> : !torch.vtensor<[4,32,?,128],f16>
    %float0.000000e00_6778 = torch.constant.float 0.000000e+00
    %false_6779 = torch.constant.bool false
    %none_6780 = torch.constant.none
    %5794:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%5791, %5792, %5793, %float0.000000e00_6778, %false_6779, %320, %none_6780) : (!torch.vtensor<[4,32,?,128],f16>, !torch.vtensor<[4,32,?,128],f16>, !torch.vtensor<[4,32,?,128],f16>, !torch.float, !torch.bool, !torch.vtensor<[4,1,?,?],f16>, !torch.none) -> (!torch.vtensor<[4,32,?,128],f16>, !torch.vtensor<[4,32,?],f32>) 
    torch.bind_symbolic_shape %5794#0, [%292], affine_map<()[s0] -> (4, 32, s0 * 32, 128)> : !torch.vtensor<[4,32,?,128],f16>
    %int1_6781 = torch.constant.int 1
    %int2_6782 = torch.constant.int 2
    %5795 = torch.aten.transpose.int %5794#0, %int1_6781, %int2_6782 : !torch.vtensor<[4,32,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %5795, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int4_6783 = torch.constant.int 4
    %int4096_6784 = torch.constant.int 4096
    %5796 = torch.prim.ListConstruct %int4_6783, %5693, %int4096_6784 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5797 = torch.aten.view %5795, %5796 : !torch.vtensor<[4,?,32,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5797, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int-2_6785 = torch.constant.int -2
    %int-1_6786 = torch.constant.int -1
    %5798 = torch.aten.transpose.int %248, %int-2_6785, %int-1_6786 : !torch.vtensor<[4096,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,4096],f16>
    %int4_6787 = torch.constant.int 4
    %5799 = torch.aten.mul.int %int4_6787, %5693 : !torch.int, !torch.int -> !torch.int
    %int4096_6788 = torch.constant.int 4096
    %5800 = torch.prim.ListConstruct %5799, %int4096_6788 : (!torch.int, !torch.int) -> !torch.list<int>
    %5801 = torch.aten.view %5797, %5800 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %5801, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %5802 = torch.aten.mm %5801, %5798 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %5802, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_6789 = torch.constant.int 4
    %int4096_6790 = torch.constant.int 4096
    %5803 = torch.prim.ListConstruct %int4_6789, %5693, %int4096_6790 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5804 = torch.aten.view %5802, %5803 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5804, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_6791 = torch.constant.int 1
    %5805 = torch.aten.add.Tensor %5642, %5804, %int1_6791 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5805, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int6_6792 = torch.constant.int 6
    %5806 = torch.prims.convert_element_type %5805, %int6_6792 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %5806, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_6793 = torch.constant.int 2
    %5807 = torch.aten.pow.Tensor_Scalar %5806, %int2_6793 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %5807, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int-1_6794 = torch.constant.int -1
    %5808 = torch.prim.ListConstruct %int-1_6794 : (!torch.int) -> !torch.list<int>
    %true_6795 = torch.constant.bool true
    %none_6796 = torch.constant.none
    %5809 = torch.aten.mean.dim %5807, %5808, %true_6795, %none_6796 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %5809, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_6797 = torch.constant.float 9.9999997473787516E-6
    %int1_6798 = torch.constant.int 1
    %5810 = torch.aten.add.Scalar %5809, %float9.999990e-06_6797, %int1_6798 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %5810, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %5811 = torch.aten.rsqrt %5810 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %5811, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %5812 = torch.aten.mul.Tensor %5806, %5811 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %5812, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %5813 = torch.aten.mul.Tensor %249, %5812 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %5813, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int5_6799 = torch.constant.int 5
    %5814 = torch.prims.convert_element_type %5813, %int5_6799 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5814, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int-2_6800 = torch.constant.int -2
    %int-1_6801 = torch.constant.int -1
    %5815 = torch.aten.transpose.int %250, %int-2_6800, %int-1_6801 : !torch.vtensor<[14336,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,14336],f16>
    %int4_6802 = torch.constant.int 4
    %5816 = torch.aten.mul.int %int4_6802, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_6803 = torch.constant.int 4096
    %5817 = torch.prim.ListConstruct %5816, %int4096_6803 : (!torch.int, !torch.int) -> !torch.list<int>
    %5818 = torch.aten.view %5814, %5817 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %5818, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %5819 = torch.aten.mm %5818, %5815 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,14336],f16> -> !torch.vtensor<[?,14336],f16>
    torch.bind_symbolic_shape %5819, [%292], affine_map<()[s0] -> (s0 * 128, 14336)> : !torch.vtensor<[?,14336],f16>
    %int4_6804 = torch.constant.int 4
    %int14336_6805 = torch.constant.int 14336
    %5820 = torch.prim.ListConstruct %int4_6804, %294, %int14336_6805 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5821 = torch.aten.view %5819, %5820 : !torch.vtensor<[?,14336],f16>, !torch.list<int> -> !torch.vtensor<[4,?,14336],f16>
    torch.bind_symbolic_shape %5821, [%292], affine_map<()[s0] -> (4, s0 * 32, 14336)> : !torch.vtensor<[4,?,14336],f16>
    %5822 = torch.aten.silu %5821 : !torch.vtensor<[4,?,14336],f16> -> !torch.vtensor<[4,?,14336],f16>
    torch.bind_symbolic_shape %5822, [%292], affine_map<()[s0] -> (4, s0 * 32, 14336)> : !torch.vtensor<[4,?,14336],f16>
    %int-2_6806 = torch.constant.int -2
    %int-1_6807 = torch.constant.int -1
    %5823 = torch.aten.transpose.int %251, %int-2_6806, %int-1_6807 : !torch.vtensor<[14336,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,14336],f16>
    %int4_6808 = torch.constant.int 4
    %5824 = torch.aten.mul.int %int4_6808, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_6809 = torch.constant.int 4096
    %5825 = torch.prim.ListConstruct %5824, %int4096_6809 : (!torch.int, !torch.int) -> !torch.list<int>
    %5826 = torch.aten.view %5814, %5825 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %5826, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %5827 = torch.aten.mm %5826, %5823 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,14336],f16> -> !torch.vtensor<[?,14336],f16>
    torch.bind_symbolic_shape %5827, [%292], affine_map<()[s0] -> (s0 * 128, 14336)> : !torch.vtensor<[?,14336],f16>
    %int4_6810 = torch.constant.int 4
    %int14336_6811 = torch.constant.int 14336
    %5828 = torch.prim.ListConstruct %int4_6810, %294, %int14336_6811 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5829 = torch.aten.view %5827, %5828 : !torch.vtensor<[?,14336],f16>, !torch.list<int> -> !torch.vtensor<[4,?,14336],f16>
    torch.bind_symbolic_shape %5829, [%292], affine_map<()[s0] -> (4, s0 * 32, 14336)> : !torch.vtensor<[4,?,14336],f16>
    %5830 = torch.aten.mul.Tensor %5822, %5829 : !torch.vtensor<[4,?,14336],f16>, !torch.vtensor<[4,?,14336],f16> -> !torch.vtensor<[4,?,14336],f16>
    torch.bind_symbolic_shape %5830, [%292], affine_map<()[s0] -> (4, s0 * 32, 14336)> : !torch.vtensor<[4,?,14336],f16>
    %int-2_6812 = torch.constant.int -2
    %int-1_6813 = torch.constant.int -1
    %5831 = torch.aten.transpose.int %252, %int-2_6812, %int-1_6813 : !torch.vtensor<[4096,14336],f16>, !torch.int, !torch.int -> !torch.vtensor<[14336,4096],f16>
    %int1_6814 = torch.constant.int 1
    %5832 = torch.aten.size.int %5821, %int1_6814 : !torch.vtensor<[4,?,14336],f16>, !torch.int -> !torch.int
    %int4_6815 = torch.constant.int 4
    %5833 = torch.aten.mul.int %int4_6815, %5832 : !torch.int, !torch.int -> !torch.int
    %int14336_6816 = torch.constant.int 14336
    %5834 = torch.prim.ListConstruct %5833, %int14336_6816 : (!torch.int, !torch.int) -> !torch.list<int>
    %5835 = torch.aten.view %5830, %5834 : !torch.vtensor<[4,?,14336],f16>, !torch.list<int> -> !torch.vtensor<[?,14336],f16>
    torch.bind_symbolic_shape %5835, [%292], affine_map<()[s0] -> (s0 * 128, 14336)> : !torch.vtensor<[?,14336],f16>
    %5836 = torch.aten.mm %5835, %5831 : !torch.vtensor<[?,14336],f16>, !torch.vtensor<[14336,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %5836, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_6817 = torch.constant.int 4
    %int4096_6818 = torch.constant.int 4096
    %5837 = torch.prim.ListConstruct %int4_6817, %5832, %int4096_6818 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5838 = torch.aten.view %5836, %5837 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5838, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_6819 = torch.constant.int 1
    %5839 = torch.aten.add.Tensor %5805, %5838, %int1_6819 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5839, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int6_6820 = torch.constant.int 6
    %5840 = torch.prims.convert_element_type %5839, %int6_6820 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %5840, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_6821 = torch.constant.int 2
    %5841 = torch.aten.pow.Tensor_Scalar %5840, %int2_6821 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %5841, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int-1_6822 = torch.constant.int -1
    %5842 = torch.prim.ListConstruct %int-1_6822 : (!torch.int) -> !torch.list<int>
    %true_6823 = torch.constant.bool true
    %none_6824 = torch.constant.none
    %5843 = torch.aten.mean.dim %5841, %5842, %true_6823, %none_6824 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %5843, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_6825 = torch.constant.float 9.9999997473787516E-6
    %int1_6826 = torch.constant.int 1
    %5844 = torch.aten.add.Scalar %5843, %float9.999990e-06_6825, %int1_6826 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %5844, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %5845 = torch.aten.rsqrt %5844 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %5845, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %5846 = torch.aten.mul.Tensor %5840, %5845 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %5846, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %5847 = torch.aten.mul.Tensor %253, %5846 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %5847, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int5_6827 = torch.constant.int 5
    %5848 = torch.prims.convert_element_type %5847, %int5_6827 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5848, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int-2_6828 = torch.constant.int -2
    %int-1_6829 = torch.constant.int -1
    %5849 = torch.aten.transpose.int %254, %int-2_6828, %int-1_6829 : !torch.vtensor<[4096,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,4096],f16>
    %int4_6830 = torch.constant.int 4
    %5850 = torch.aten.mul.int %int4_6830, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_6831 = torch.constant.int 4096
    %5851 = torch.prim.ListConstruct %5850, %int4096_6831 : (!torch.int, !torch.int) -> !torch.list<int>
    %5852 = torch.aten.view %5848, %5851 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %5852, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %5853 = torch.aten.mm %5852, %5849 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %5853, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_6832 = torch.constant.int 4
    %int4096_6833 = torch.constant.int 4096
    %5854 = torch.prim.ListConstruct %int4_6832, %294, %int4096_6833 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5855 = torch.aten.view %5853, %5854 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5855, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int-2_6834 = torch.constant.int -2
    %int-1_6835 = torch.constant.int -1
    %5856 = torch.aten.transpose.int %255, %int-2_6834, %int-1_6835 : !torch.vtensor<[1024,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,1024],f16>
    %int4_6836 = torch.constant.int 4
    %5857 = torch.aten.mul.int %int4_6836, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_6837 = torch.constant.int 4096
    %5858 = torch.prim.ListConstruct %5857, %int4096_6837 : (!torch.int, !torch.int) -> !torch.list<int>
    %5859 = torch.aten.view %5848, %5858 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %5859, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %5860 = torch.aten.mm %5859, %5856 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1024],f16> -> !torch.vtensor<[?,1024],f16>
    torch.bind_symbolic_shape %5860, [%292], affine_map<()[s0] -> (s0 * 128, 1024)> : !torch.vtensor<[?,1024],f16>
    %int4_6838 = torch.constant.int 4
    %int1024_6839 = torch.constant.int 1024
    %5861 = torch.prim.ListConstruct %int4_6838, %294, %int1024_6839 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5862 = torch.aten.view %5860, %5861 : !torch.vtensor<[?,1024],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1024],f16>
    torch.bind_symbolic_shape %5862, [%292], affine_map<()[s0] -> (4, s0 * 32, 1024)> : !torch.vtensor<[4,?,1024],f16>
    %int-2_6840 = torch.constant.int -2
    %int-1_6841 = torch.constant.int -1
    %5863 = torch.aten.transpose.int %256, %int-2_6840, %int-1_6841 : !torch.vtensor<[1024,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,1024],f16>
    %int4_6842 = torch.constant.int 4
    %5864 = torch.aten.mul.int %int4_6842, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_6843 = torch.constant.int 4096
    %5865 = torch.prim.ListConstruct %5864, %int4096_6843 : (!torch.int, !torch.int) -> !torch.list<int>
    %5866 = torch.aten.view %5848, %5865 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %5866, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %5867 = torch.aten.mm %5866, %5863 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1024],f16> -> !torch.vtensor<[?,1024],f16>
    torch.bind_symbolic_shape %5867, [%292], affine_map<()[s0] -> (s0 * 128, 1024)> : !torch.vtensor<[?,1024],f16>
    %int4_6844 = torch.constant.int 4
    %int1024_6845 = torch.constant.int 1024
    %5868 = torch.prim.ListConstruct %int4_6844, %294, %int1024_6845 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5869 = torch.aten.view %5867, %5868 : !torch.vtensor<[?,1024],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1024],f16>
    torch.bind_symbolic_shape %5869, [%292], affine_map<()[s0] -> (4, s0 * 32, 1024)> : !torch.vtensor<[4,?,1024],f16>
    %int4_6846 = torch.constant.int 4
    %int32_6847 = torch.constant.int 32
    %int128_6848 = torch.constant.int 128
    %5870 = torch.prim.ListConstruct %int4_6846, %294, %int32_6847, %int128_6848 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5871 = torch.aten.view %5855, %5870 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %5871, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int4_6849 = torch.constant.int 4
    %int8_6850 = torch.constant.int 8
    %int128_6851 = torch.constant.int 128
    %5872 = torch.prim.ListConstruct %int4_6849, %294, %int8_6850, %int128_6851 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5873 = torch.aten.view %5862, %5872 : !torch.vtensor<[4,?,1024],f16>, !torch.list<int> -> !torch.vtensor<[4,?,8,128],f16>
    torch.bind_symbolic_shape %5873, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 128)> : !torch.vtensor<[4,?,8,128],f16>
    %int4_6852 = torch.constant.int 4
    %int8_6853 = torch.constant.int 8
    %int128_6854 = torch.constant.int 128
    %5874 = torch.prim.ListConstruct %int4_6852, %294, %int8_6853, %int128_6854 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5875 = torch.aten.view %5869, %5874 : !torch.vtensor<[4,?,1024],f16>, !torch.list<int> -> !torch.vtensor<[4,?,8,128],f16>
    torch.bind_symbolic_shape %5875, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 128)> : !torch.vtensor<[4,?,8,128],f16>
    %int131072_6855 = torch.constant.int 131072
    %none_6856 = torch.constant.none
    %none_6857 = torch.constant.none
    %cpu_6858 = torch.constant.device "cpu"
    %false_6859 = torch.constant.bool false
    %5876 = torch.aten.arange %int131072_6855, %none_6856, %none_6857, %cpu_6858, %false_6859 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64>
    %int0_6860 = torch.constant.int 0
    %int128_6861 = torch.constant.int 128
    %int2_6862 = torch.constant.int 2
    %none_6863 = torch.constant.none
    %none_6864 = torch.constant.none
    %cpu_6865 = torch.constant.device "cpu"
    %false_6866 = torch.constant.bool false
    %5877 = torch.aten.arange.start_step %int0_6860, %int128_6861, %int2_6862, %none_6863, %none_6864, %cpu_6865, %false_6866 : !torch.int, !torch.int, !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64>
    %int0_6867 = torch.constant.int 0
    %int0_6868 = torch.constant.int 0
    %int64_6869 = torch.constant.int 64
    %int1_6870 = torch.constant.int 1
    %5878 = torch.aten.slice.Tensor %5877, %int0_6867, %int0_6868, %int64_6869, %int1_6870 : !torch.vtensor<[64],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[64],si64>
    %int6_6871 = torch.constant.int 6
    %5879 = torch.prims.convert_element_type %5878, %int6_6871 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32>
    %int128_6872 = torch.constant.int 128
    %5880 = torch.aten.div.Scalar %5879, %int128_6872 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32>
    %float5.000000e05_6873 = torch.constant.float 5.000000e+05
    %5881 = torch.aten.pow.Scalar %float5.000000e05_6873, %5880 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %5882 = torch.aten.reciprocal %5881 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %float1.000000e00_6874 = torch.constant.float 1.000000e+00
    %5883 = torch.aten.mul.Scalar %5882, %float1.000000e00_6874 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32>
    %int131072_6875 = torch.constant.int 131072
    %int1_6876 = torch.constant.int 1
    %5884 = torch.prim.ListConstruct %int131072_6875, %int1_6876 : (!torch.int, !torch.int) -> !torch.list<int>
    %5885 = torch.aten.view %5876, %5884 : !torch.vtensor<[131072],si64>, !torch.list<int> -> !torch.vtensor<[131072,1],si64>
    %5886 = torch.aten.mul.Tensor %5885, %5883 : !torch.vtensor<[131072,1],si64>, !torch.vtensor<[64],f32> -> !torch.vtensor<[131072,64],f32>
    %5887 = torch.aten.cos %5886 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %5888 = torch.aten.sin %5886 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %5889 = torch.aten.complex %5887, %5888 : !torch.vtensor<[131072,64],f32>, !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],complex<f32>>
    %int1_6877 = torch.constant.int 1
    %5890 = torch.aten.size.int %5855, %int1_6877 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.int
    %int0_6878 = torch.constant.int 0
    %5891 = torch.aten.add.int %int0_6878, %5890 : !torch.int, !torch.int -> !torch.int
    %int0_6879 = torch.constant.int 0
    %int0_6880 = torch.constant.int 0
    %int1_6881 = torch.constant.int 1
    %5892 = torch.aten.slice.Tensor %5889, %int0_6879, %int0_6880, %5891, %int1_6881 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %5892, [%292], affine_map<()[s0] -> (s0 * 32, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_6882 = torch.constant.int 1
    %int0_6883 = torch.constant.int 0
    %int9223372036854775807_6884 = torch.constant.int 9223372036854775807
    %int1_6885 = torch.constant.int 1
    %5893 = torch.aten.slice.Tensor %5892, %int1_6882, %int0_6883, %int9223372036854775807_6884, %int1_6885 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %5893, [%292], affine_map<()[s0] -> (s0 * 32, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_6886 = torch.constant.int 0
    %5894 = torch.aten.unsqueeze %5893, %int0_6886 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %5894, [%292], affine_map<()[s0] -> (1, s0 * 32, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_6887 = torch.constant.int 2
    %5895 = torch.aten.unsqueeze %5894, %int2_6887 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %5895, [%292], affine_map<()[s0] -> (1, s0 * 32, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_6888 = torch.constant.int 3
    %int0_6889 = torch.constant.int 0
    %int9223372036854775807_6890 = torch.constant.int 9223372036854775807
    %int1_6891 = torch.constant.int 1
    %5896 = torch.aten.slice.Tensor %5895, %int3_6888, %int0_6889, %int9223372036854775807_6890, %int1_6891 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %5896, [%292], affine_map<()[s0] -> (1, s0 * 32, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %5897 = torch_c.to_builtin_tensor %5871 : !torch.vtensor<[4,?,32,128],f16> -> tensor<4x?x32x128xf16>
    %c1_6892 = arith.constant 1 : index
    %dim_6893 = tensor.dim %5897, %c1_6892 : tensor<4x?x32x128xf16>
    %5898 = flow.tensor.bitcast %5897 : tensor<4x?x32x128xf16>{%dim_6893} -> tensor<4x?x32x64xcomplex<f16>>{%dim_6893}
    %5899 = torch_c.from_builtin_tensor %5898 : tensor<4x?x32x64xcomplex<f16>> -> !torch.vtensor<[4,?,32,64],complex<f16>>
    torch.bind_symbolic_shape %5899, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 64)> : !torch.vtensor<[4,?,32,64],complex<f16>>
    %5900 = torch.aten.mul.Tensor %5899, %5896 : !torch.vtensor<[4,?,32,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,32,64],complex<f32>>
    torch.bind_symbolic_shape %5900, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 64)> : !torch.vtensor<[4,?,32,64],complex<f32>>
    %5901 = torch_c.to_builtin_tensor %5900 : !torch.vtensor<[4,?,32,64],complex<f32>> -> tensor<4x?x32x64xcomplex<f32>>
    %c1_6894 = arith.constant 1 : index
    %dim_6895 = tensor.dim %5901, %c1_6894 : tensor<4x?x32x64xcomplex<f32>>
    %5902 = flow.tensor.bitcast %5901 : tensor<4x?x32x64xcomplex<f32>>{%dim_6895} -> tensor<4x?x32x128xf32>{%dim_6895}
    %5903 = torch_c.from_builtin_tensor %5902 : tensor<4x?x32x128xf32> -> !torch.vtensor<[4,?,32,128],f32>
    torch.bind_symbolic_shape %5903, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f32>
    %int5_6896 = torch.constant.int 5
    %5904 = torch.prims.convert_element_type %5903, %int5_6896 : !torch.vtensor<[4,?,32,128],f32>, !torch.int -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %5904, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int131072_6897 = torch.constant.int 131072
    %none_6898 = torch.constant.none
    %none_6899 = torch.constant.none
    %cpu_6900 = torch.constant.device "cpu"
    %false_6901 = torch.constant.bool false
    %5905 = torch.aten.arange %int131072_6897, %none_6898, %none_6899, %cpu_6900, %false_6901 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64>
    %int0_6902 = torch.constant.int 0
    %int128_6903 = torch.constant.int 128
    %int2_6904 = torch.constant.int 2
    %none_6905 = torch.constant.none
    %none_6906 = torch.constant.none
    %cpu_6907 = torch.constant.device "cpu"
    %false_6908 = torch.constant.bool false
    %5906 = torch.aten.arange.start_step %int0_6902, %int128_6903, %int2_6904, %none_6905, %none_6906, %cpu_6907, %false_6908 : !torch.int, !torch.int, !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64>
    %int0_6909 = torch.constant.int 0
    %int0_6910 = torch.constant.int 0
    %int64_6911 = torch.constant.int 64
    %int1_6912 = torch.constant.int 1
    %5907 = torch.aten.slice.Tensor %5906, %int0_6909, %int0_6910, %int64_6911, %int1_6912 : !torch.vtensor<[64],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[64],si64>
    %int6_6913 = torch.constant.int 6
    %5908 = torch.prims.convert_element_type %5907, %int6_6913 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32>
    %int128_6914 = torch.constant.int 128
    %5909 = torch.aten.div.Scalar %5908, %int128_6914 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32>
    %float5.000000e05_6915 = torch.constant.float 5.000000e+05
    %5910 = torch.aten.pow.Scalar %float5.000000e05_6915, %5909 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %5911 = torch.aten.reciprocal %5910 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %float1.000000e00_6916 = torch.constant.float 1.000000e+00
    %5912 = torch.aten.mul.Scalar %5911, %float1.000000e00_6916 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32>
    %int131072_6917 = torch.constant.int 131072
    %int1_6918 = torch.constant.int 1
    %5913 = torch.prim.ListConstruct %int131072_6917, %int1_6918 : (!torch.int, !torch.int) -> !torch.list<int>
    %5914 = torch.aten.view %5905, %5913 : !torch.vtensor<[131072],si64>, !torch.list<int> -> !torch.vtensor<[131072,1],si64>
    %5915 = torch.aten.mul.Tensor %5914, %5912 : !torch.vtensor<[131072,1],si64>, !torch.vtensor<[64],f32> -> !torch.vtensor<[131072,64],f32>
    %5916 = torch.aten.cos %5915 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %5917 = torch.aten.sin %5915 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %5918 = torch.aten.complex %5916, %5917 : !torch.vtensor<[131072,64],f32>, !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],complex<f32>>
    %int1_6919 = torch.constant.int 1
    %5919 = torch.aten.size.int %5862, %int1_6919 : !torch.vtensor<[4,?,1024],f16>, !torch.int -> !torch.int
    %int0_6920 = torch.constant.int 0
    %5920 = torch.aten.add.int %int0_6920, %5919 : !torch.int, !torch.int -> !torch.int
    %int0_6921 = torch.constant.int 0
    %int0_6922 = torch.constant.int 0
    %int1_6923 = torch.constant.int 1
    %5921 = torch.aten.slice.Tensor %5918, %int0_6921, %int0_6922, %5920, %int1_6923 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %5921, [%292], affine_map<()[s0] -> (s0 * 32, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_6924 = torch.constant.int 1
    %int0_6925 = torch.constant.int 0
    %int9223372036854775807_6926 = torch.constant.int 9223372036854775807
    %int1_6927 = torch.constant.int 1
    %5922 = torch.aten.slice.Tensor %5921, %int1_6924, %int0_6925, %int9223372036854775807_6926, %int1_6927 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %5922, [%292], affine_map<()[s0] -> (s0 * 32, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_6928 = torch.constant.int 0
    %5923 = torch.aten.unsqueeze %5922, %int0_6928 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %5923, [%292], affine_map<()[s0] -> (1, s0 * 32, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_6929 = torch.constant.int 2
    %5924 = torch.aten.unsqueeze %5923, %int2_6929 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %5924, [%292], affine_map<()[s0] -> (1, s0 * 32, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_6930 = torch.constant.int 3
    %int0_6931 = torch.constant.int 0
    %int9223372036854775807_6932 = torch.constant.int 9223372036854775807
    %int1_6933 = torch.constant.int 1
    %5925 = torch.aten.slice.Tensor %5924, %int3_6930, %int0_6931, %int9223372036854775807_6932, %int1_6933 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %5925, [%292], affine_map<()[s0] -> (1, s0 * 32, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %5926 = torch_c.to_builtin_tensor %5873 : !torch.vtensor<[4,?,8,128],f16> -> tensor<4x?x8x128xf16>
    %c1_6934 = arith.constant 1 : index
    %dim_6935 = tensor.dim %5926, %c1_6934 : tensor<4x?x8x128xf16>
    %5927 = flow.tensor.bitcast %5926 : tensor<4x?x8x128xf16>{%dim_6935} -> tensor<4x?x8x64xcomplex<f16>>{%dim_6935}
    %5928 = torch_c.from_builtin_tensor %5927 : tensor<4x?x8x64xcomplex<f16>> -> !torch.vtensor<[4,?,8,64],complex<f16>>
    torch.bind_symbolic_shape %5928, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 64)> : !torch.vtensor<[4,?,8,64],complex<f16>>
    %5929 = torch.aten.mul.Tensor %5928, %5925 : !torch.vtensor<[4,?,8,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,8,64],complex<f32>>
    torch.bind_symbolic_shape %5929, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 64)> : !torch.vtensor<[4,?,8,64],complex<f32>>
    %5930 = torch_c.to_builtin_tensor %5929 : !torch.vtensor<[4,?,8,64],complex<f32>> -> tensor<4x?x8x64xcomplex<f32>>
    %c1_6936 = arith.constant 1 : index
    %dim_6937 = tensor.dim %5930, %c1_6936 : tensor<4x?x8x64xcomplex<f32>>
    %5931 = flow.tensor.bitcast %5930 : tensor<4x?x8x64xcomplex<f32>>{%dim_6937} -> tensor<4x?x8x128xf32>{%dim_6937}
    %5932 = torch_c.from_builtin_tensor %5931 : tensor<4x?x8x128xf32> -> !torch.vtensor<[4,?,8,128],f32>
    torch.bind_symbolic_shape %5932, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 128)> : !torch.vtensor<[4,?,8,128],f32>
    %int5_6938 = torch.constant.int 5
    %5933 = torch.prims.convert_element_type %5932, %int5_6938 : !torch.vtensor<[4,?,8,128],f32>, !torch.int -> !torch.vtensor<[4,?,8,128],f16>
    torch.bind_symbolic_shape %5933, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 128)> : !torch.vtensor<[4,?,8,128],f16>
    %int64_6939 = torch.constant.int 64
    %5934 = torch.aten.mul.Scalar %arg2, %int64_6939 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %5934, [%292], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int56 = torch.constant.int 56
    %int1_6940 = torch.constant.int 1
    %5935 = torch.aten.add.Scalar %5934, %int56, %int1_6940 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %5935, [%292], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int4_6941 = torch.constant.int 4
    %int32_6942 = torch.constant.int 32
    %int8_6943 = torch.constant.int 8
    %int128_6944 = torch.constant.int 128
    %5936 = torch.prim.ListConstruct %int4_6941, %425, %int32_6942, %int8_6943, %int128_6944 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5937 = torch.aten.view %5933, %5936 : !torch.vtensor<[4,?,8,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,8,128],f16>
    torch.bind_symbolic_shape %5937, [%292], affine_map<()[s0] -> (4, s0, 32, 8, 128)> : !torch.vtensor<[4,?,32,8,128],f16>
    %int4_6945 = torch.constant.int 4
    %5938 = torch.aten.mul.int %int4_6945, %425 : !torch.int, !torch.int -> !torch.int
    %int32_6946 = torch.constant.int 32
    %int8_6947 = torch.constant.int 8
    %int128_6948 = torch.constant.int 128
    %5939 = torch.prim.ListConstruct %5938, %int32_6946, %int8_6947, %int128_6948 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5940 = torch.aten.view %5937, %5939 : !torch.vtensor<[4,?,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %5940, [%292], affine_map<()[s0] -> (s0 * 4, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int4_6949 = torch.constant.int 4
    %5941 = torch.aten.mul.int %int4_6949, %425 : !torch.int, !torch.int -> !torch.int
    %5942 = torch.prim.ListConstruct %5941 : (!torch.int) -> !torch.list<int>
    %5943 = torch.aten.view %5935, %5942 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %5943, [%292], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int32_6950 = torch.constant.int 32
    %int2_6951 = torch.constant.int 2
    %int32_6952 = torch.constant.int 32
    %int8_6953 = torch.constant.int 8
    %int128_6954 = torch.constant.int 128
    %5944 = torch.prim.ListConstruct %416, %int32_6950, %int2_6951, %int32_6952, %int8_6953, %int128_6954 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5945 = torch.aten.view %5777, %5944 : !torch.vtensor<[?,2097152],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f16>
    torch.bind_symbolic_shape %5945, [%293], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f16>
    %int32_6955 = torch.constant.int 32
    %5946 = torch.aten.mul.int %416, %int32_6955 : !torch.int, !torch.int -> !torch.int
    %int2_6956 = torch.constant.int 2
    %5947 = torch.aten.mul.int %5946, %int2_6956 : !torch.int, !torch.int -> !torch.int
    %int32_6957 = torch.constant.int 32
    %int8_6958 = torch.constant.int 8
    %int128_6959 = torch.constant.int 128
    %5948 = torch.prim.ListConstruct %5947, %int32_6957, %int8_6958, %int128_6959 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5949 = torch.aten.view %5945, %5948 : !torch.vtensor<[?,32,2,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %5949, [%293], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %5950 = torch.prim.ListConstruct %5943 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_6960 = torch.constant.bool false
    %5951 = torch.aten.index_put %5949, %5950, %5940, %false_6960 : !torch.vtensor<[?,32,8,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,32,8,128],f16>, !torch.bool -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %5951, [%293], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int32_6961 = torch.constant.int 32
    %int2_6962 = torch.constant.int 2
    %int32_6963 = torch.constant.int 32
    %int8_6964 = torch.constant.int 8
    %int128_6965 = torch.constant.int 128
    %5952 = torch.prim.ListConstruct %416, %int32_6961, %int2_6962, %int32_6963, %int8_6964, %int128_6965 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5953 = torch.aten.view %5951, %5952 : !torch.vtensor<[?,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f16>
    torch.bind_symbolic_shape %5953, [%293], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f16>
    %int2097152_6966 = torch.constant.int 2097152
    %5954 = torch.prim.ListConstruct %416, %int2097152_6966 : (!torch.int, !torch.int) -> !torch.list<int>
    %5955 = torch.aten.view %5953, %5954 : !torch.vtensor<[?,32,2,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,2097152],f16>
    torch.bind_symbolic_shape %5955, [%293], affine_map<()[s0] -> (s0, 2097152)> : !torch.vtensor<[?,2097152],f16>
    %int32_6967 = torch.constant.int 32
    %int2_6968 = torch.constant.int 2
    %int32_6969 = torch.constant.int 32
    %int8_6970 = torch.constant.int 8
    %int128_6971 = torch.constant.int 128
    %5956 = torch.prim.ListConstruct %416, %int32_6967, %int2_6968, %int32_6969, %int8_6970, %int128_6971 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5957 = torch.aten.view %5955, %5956 : !torch.vtensor<[?,2097152],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f16>
    torch.bind_symbolic_shape %5957, [%293], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f16>
    %int32_6972 = torch.constant.int 32
    %int8_6973 = torch.constant.int 8
    %int128_6974 = torch.constant.int 128
    %5958 = torch.prim.ListConstruct %5947, %int32_6972, %int8_6973, %int128_6974 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5959 = torch.aten.view %5957, %5958 : !torch.vtensor<[?,32,2,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %5959, [%293], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int4_6975 = torch.constant.int 4
    %int32_6976 = torch.constant.int 32
    %int8_6977 = torch.constant.int 8
    %int128_6978 = torch.constant.int 128
    %5960 = torch.prim.ListConstruct %int4_6975, %425, %int32_6976, %int8_6977, %int128_6978 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5961 = torch.aten.view %5875, %5960 : !torch.vtensor<[4,?,8,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,8,128],f16>
    torch.bind_symbolic_shape %5961, [%292], affine_map<()[s0] -> (4, s0, 32, 8, 128)> : !torch.vtensor<[4,?,32,8,128],f16>
    %int4_6979 = torch.constant.int 4
    %5962 = torch.aten.mul.int %int4_6979, %425 : !torch.int, !torch.int -> !torch.int
    %int32_6980 = torch.constant.int 32
    %int8_6981 = torch.constant.int 8
    %int128_6982 = torch.constant.int 128
    %5963 = torch.prim.ListConstruct %5962, %int32_6980, %int8_6981, %int128_6982 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5964 = torch.aten.view %5961, %5963 : !torch.vtensor<[4,?,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %5964, [%292], affine_map<()[s0] -> (s0 * 4, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int1_6983 = torch.constant.int 1
    %int1_6984 = torch.constant.int 1
    %5965 = torch.aten.add.Scalar %5935, %int1_6983, %int1_6984 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %5965, [%292], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int4_6985 = torch.constant.int 4
    %5966 = torch.aten.mul.int %int4_6985, %425 : !torch.int, !torch.int -> !torch.int
    %5967 = torch.prim.ListConstruct %5966 : (!torch.int) -> !torch.list<int>
    %5968 = torch.aten.view %5965, %5967 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %5968, [%292], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %5969 = torch.prim.ListConstruct %5968 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_6986 = torch.constant.bool false
    %5970 = torch.aten.index_put %5959, %5969, %5964, %false_6986 : !torch.vtensor<[?,32,8,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,32,8,128],f16>, !torch.bool -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %5970, [%293], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int32_6987 = torch.constant.int 32
    %int2_6988 = torch.constant.int 2
    %int32_6989 = torch.constant.int 32
    %int8_6990 = torch.constant.int 8
    %int128_6991 = torch.constant.int 128
    %5971 = torch.prim.ListConstruct %416, %int32_6987, %int2_6988, %int32_6989, %int8_6990, %int128_6991 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5972 = torch.aten.view %5970, %5971 : !torch.vtensor<[?,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f16>
    torch.bind_symbolic_shape %5972, [%293], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f16>
    %int2097152_6992 = torch.constant.int 2097152
    %5973 = torch.prim.ListConstruct %416, %int2097152_6992 : (!torch.int, !torch.int) -> !torch.list<int>
    %5974 = torch.aten.view %5972, %5973 : !torch.vtensor<[?,32,2,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,2097152],f16>
    torch.bind_symbolic_shape %5974, [%293], affine_map<()[s0] -> (s0, 2097152)> : !torch.vtensor<[?,2097152],f16>
    %int-2_6993 = torch.constant.int -2
    %5975 = torch.aten.unsqueeze %5933, %int-2_6993 : !torch.vtensor<[4,?,8,128],f16>, !torch.int -> !torch.vtensor<[4,?,8,1,128],f16>
    torch.bind_symbolic_shape %5975, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 1, 128)> : !torch.vtensor<[4,?,8,1,128],f16>
    %int4_6994 = torch.constant.int 4
    %int8_6995 = torch.constant.int 8
    %int4_6996 = torch.constant.int 4
    %int128_6997 = torch.constant.int 128
    %5976 = torch.prim.ListConstruct %int4_6994, %5919, %int8_6995, %int4_6996, %int128_6997 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_6998 = torch.constant.bool false
    %5977 = torch.aten.expand %5975, %5976, %false_6998 : !torch.vtensor<[4,?,8,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,8,4,128],f16>
    torch.bind_symbolic_shape %5977, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 4, 128)> : !torch.vtensor<[4,?,8,4,128],f16>
    %int0_6999 = torch.constant.int 0
    %5978 = torch.aten.clone %5977, %int0_6999 : !torch.vtensor<[4,?,8,4,128],f16>, !torch.int -> !torch.vtensor<[4,?,8,4,128],f16>
    torch.bind_symbolic_shape %5978, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 4, 128)> : !torch.vtensor<[4,?,8,4,128],f16>
    %int4_7000 = torch.constant.int 4
    %int32_7001 = torch.constant.int 32
    %int128_7002 = torch.constant.int 128
    %5979 = torch.prim.ListConstruct %int4_7000, %5919, %int32_7001, %int128_7002 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5980 = torch.aten._unsafe_view %5978, %5979 : !torch.vtensor<[4,?,8,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %5980, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int-2_7003 = torch.constant.int -2
    %5981 = torch.aten.unsqueeze %5875, %int-2_7003 : !torch.vtensor<[4,?,8,128],f16>, !torch.int -> !torch.vtensor<[4,?,8,1,128],f16>
    torch.bind_symbolic_shape %5981, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 1, 128)> : !torch.vtensor<[4,?,8,1,128],f16>
    %int1_7004 = torch.constant.int 1
    %5982 = torch.aten.size.int %5869, %int1_7004 : !torch.vtensor<[4,?,1024],f16>, !torch.int -> !torch.int
    %int4_7005 = torch.constant.int 4
    %int8_7006 = torch.constant.int 8
    %int4_7007 = torch.constant.int 4
    %int128_7008 = torch.constant.int 128
    %5983 = torch.prim.ListConstruct %int4_7005, %5982, %int8_7006, %int4_7007, %int128_7008 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_7009 = torch.constant.bool false
    %5984 = torch.aten.expand %5981, %5983, %false_7009 : !torch.vtensor<[4,?,8,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,8,4,128],f16>
    torch.bind_symbolic_shape %5984, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 4, 128)> : !torch.vtensor<[4,?,8,4,128],f16>
    %int0_7010 = torch.constant.int 0
    %5985 = torch.aten.clone %5984, %int0_7010 : !torch.vtensor<[4,?,8,4,128],f16>, !torch.int -> !torch.vtensor<[4,?,8,4,128],f16>
    torch.bind_symbolic_shape %5985, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 4, 128)> : !torch.vtensor<[4,?,8,4,128],f16>
    %int4_7011 = torch.constant.int 4
    %int32_7012 = torch.constant.int 32
    %int128_7013 = torch.constant.int 128
    %5986 = torch.prim.ListConstruct %int4_7011, %5982, %int32_7012, %int128_7013 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5987 = torch.aten._unsafe_view %5985, %5986 : !torch.vtensor<[4,?,8,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %5987, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int1_7014 = torch.constant.int 1
    %int2_7015 = torch.constant.int 2
    %5988 = torch.aten.transpose.int %5904, %int1_7014, %int2_7015 : !torch.vtensor<[4,?,32,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,32,?,128],f16>
    torch.bind_symbolic_shape %5988, [%292], affine_map<()[s0] -> (4, 32, s0 * 32, 128)> : !torch.vtensor<[4,32,?,128],f16>
    %int1_7016 = torch.constant.int 1
    %int2_7017 = torch.constant.int 2
    %5989 = torch.aten.transpose.int %5980, %int1_7016, %int2_7017 : !torch.vtensor<[4,?,32,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,32,?,128],f16>
    torch.bind_symbolic_shape %5989, [%292], affine_map<()[s0] -> (4, 32, s0 * 32, 128)> : !torch.vtensor<[4,32,?,128],f16>
    %int1_7018 = torch.constant.int 1
    %int2_7019 = torch.constant.int 2
    %5990 = torch.aten.transpose.int %5987, %int1_7018, %int2_7019 : !torch.vtensor<[4,?,32,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,32,?,128],f16>
    torch.bind_symbolic_shape %5990, [%292], affine_map<()[s0] -> (4, 32, s0 * 32, 128)> : !torch.vtensor<[4,32,?,128],f16>
    %float0.000000e00_7020 = torch.constant.float 0.000000e+00
    %false_7021 = torch.constant.bool false
    %none_7022 = torch.constant.none
    %5991:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%5988, %5989, %5990, %float0.000000e00_7020, %false_7021, %320, %none_7022) : (!torch.vtensor<[4,32,?,128],f16>, !torch.vtensor<[4,32,?,128],f16>, !torch.vtensor<[4,32,?,128],f16>, !torch.float, !torch.bool, !torch.vtensor<[4,1,?,?],f16>, !torch.none) -> (!torch.vtensor<[4,32,?,128],f16>, !torch.vtensor<[4,32,?],f32>) 
    torch.bind_symbolic_shape %5991#0, [%292], affine_map<()[s0] -> (4, 32, s0 * 32, 128)> : !torch.vtensor<[4,32,?,128],f16>
    %int1_7023 = torch.constant.int 1
    %int2_7024 = torch.constant.int 2
    %5992 = torch.aten.transpose.int %5991#0, %int1_7023, %int2_7024 : !torch.vtensor<[4,32,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %5992, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int4_7025 = torch.constant.int 4
    %int4096_7026 = torch.constant.int 4096
    %5993 = torch.prim.ListConstruct %int4_7025, %5890, %int4096_7026 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5994 = torch.aten.view %5992, %5993 : !torch.vtensor<[4,?,32,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5994, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int-2_7027 = torch.constant.int -2
    %int-1_7028 = torch.constant.int -1
    %5995 = torch.aten.transpose.int %257, %int-2_7027, %int-1_7028 : !torch.vtensor<[4096,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,4096],f16>
    %int4_7029 = torch.constant.int 4
    %5996 = torch.aten.mul.int %int4_7029, %5890 : !torch.int, !torch.int -> !torch.int
    %int4096_7030 = torch.constant.int 4096
    %5997 = torch.prim.ListConstruct %5996, %int4096_7030 : (!torch.int, !torch.int) -> !torch.list<int>
    %5998 = torch.aten.view %5994, %5997 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %5998, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %5999 = torch.aten.mm %5998, %5995 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %5999, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_7031 = torch.constant.int 4
    %int4096_7032 = torch.constant.int 4096
    %6000 = torch.prim.ListConstruct %int4_7031, %5890, %int4096_7032 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6001 = torch.aten.view %5999, %6000 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %6001, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_7033 = torch.constant.int 1
    %6002 = torch.aten.add.Tensor %5839, %6001, %int1_7033 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %6002, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int6_7034 = torch.constant.int 6
    %6003 = torch.prims.convert_element_type %6002, %int6_7034 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %6003, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_7035 = torch.constant.int 2
    %6004 = torch.aten.pow.Tensor_Scalar %6003, %int2_7035 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %6004, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int-1_7036 = torch.constant.int -1
    %6005 = torch.prim.ListConstruct %int-1_7036 : (!torch.int) -> !torch.list<int>
    %true_7037 = torch.constant.bool true
    %none_7038 = torch.constant.none
    %6006 = torch.aten.mean.dim %6004, %6005, %true_7037, %none_7038 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %6006, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_7039 = torch.constant.float 9.9999997473787516E-6
    %int1_7040 = torch.constant.int 1
    %6007 = torch.aten.add.Scalar %6006, %float9.999990e-06_7039, %int1_7040 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %6007, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %6008 = torch.aten.rsqrt %6007 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %6008, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %6009 = torch.aten.mul.Tensor %6003, %6008 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %6009, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %6010 = torch.aten.mul.Tensor %258, %6009 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %6010, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int5_7041 = torch.constant.int 5
    %6011 = torch.prims.convert_element_type %6010, %int5_7041 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %6011, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int-2_7042 = torch.constant.int -2
    %int-1_7043 = torch.constant.int -1
    %6012 = torch.aten.transpose.int %259, %int-2_7042, %int-1_7043 : !torch.vtensor<[14336,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,14336],f16>
    %int4_7044 = torch.constant.int 4
    %6013 = torch.aten.mul.int %int4_7044, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_7045 = torch.constant.int 4096
    %6014 = torch.prim.ListConstruct %6013, %int4096_7045 : (!torch.int, !torch.int) -> !torch.list<int>
    %6015 = torch.aten.view %6011, %6014 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %6015, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %6016 = torch.aten.mm %6015, %6012 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,14336],f16> -> !torch.vtensor<[?,14336],f16>
    torch.bind_symbolic_shape %6016, [%292], affine_map<()[s0] -> (s0 * 128, 14336)> : !torch.vtensor<[?,14336],f16>
    %int4_7046 = torch.constant.int 4
    %int14336_7047 = torch.constant.int 14336
    %6017 = torch.prim.ListConstruct %int4_7046, %294, %int14336_7047 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6018 = torch.aten.view %6016, %6017 : !torch.vtensor<[?,14336],f16>, !torch.list<int> -> !torch.vtensor<[4,?,14336],f16>
    torch.bind_symbolic_shape %6018, [%292], affine_map<()[s0] -> (4, s0 * 32, 14336)> : !torch.vtensor<[4,?,14336],f16>
    %6019 = torch.aten.silu %6018 : !torch.vtensor<[4,?,14336],f16> -> !torch.vtensor<[4,?,14336],f16>
    torch.bind_symbolic_shape %6019, [%292], affine_map<()[s0] -> (4, s0 * 32, 14336)> : !torch.vtensor<[4,?,14336],f16>
    %int-2_7048 = torch.constant.int -2
    %int-1_7049 = torch.constant.int -1
    %6020 = torch.aten.transpose.int %260, %int-2_7048, %int-1_7049 : !torch.vtensor<[14336,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,14336],f16>
    %int4_7050 = torch.constant.int 4
    %6021 = torch.aten.mul.int %int4_7050, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_7051 = torch.constant.int 4096
    %6022 = torch.prim.ListConstruct %6021, %int4096_7051 : (!torch.int, !torch.int) -> !torch.list<int>
    %6023 = torch.aten.view %6011, %6022 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %6023, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %6024 = torch.aten.mm %6023, %6020 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,14336],f16> -> !torch.vtensor<[?,14336],f16>
    torch.bind_symbolic_shape %6024, [%292], affine_map<()[s0] -> (s0 * 128, 14336)> : !torch.vtensor<[?,14336],f16>
    %int4_7052 = torch.constant.int 4
    %int14336_7053 = torch.constant.int 14336
    %6025 = torch.prim.ListConstruct %int4_7052, %294, %int14336_7053 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6026 = torch.aten.view %6024, %6025 : !torch.vtensor<[?,14336],f16>, !torch.list<int> -> !torch.vtensor<[4,?,14336],f16>
    torch.bind_symbolic_shape %6026, [%292], affine_map<()[s0] -> (4, s0 * 32, 14336)> : !torch.vtensor<[4,?,14336],f16>
    %6027 = torch.aten.mul.Tensor %6019, %6026 : !torch.vtensor<[4,?,14336],f16>, !torch.vtensor<[4,?,14336],f16> -> !torch.vtensor<[4,?,14336],f16>
    torch.bind_symbolic_shape %6027, [%292], affine_map<()[s0] -> (4, s0 * 32, 14336)> : !torch.vtensor<[4,?,14336],f16>
    %int-2_7054 = torch.constant.int -2
    %int-1_7055 = torch.constant.int -1
    %6028 = torch.aten.transpose.int %261, %int-2_7054, %int-1_7055 : !torch.vtensor<[4096,14336],f16>, !torch.int, !torch.int -> !torch.vtensor<[14336,4096],f16>
    %int1_7056 = torch.constant.int 1
    %6029 = torch.aten.size.int %6018, %int1_7056 : !torch.vtensor<[4,?,14336],f16>, !torch.int -> !torch.int
    %int4_7057 = torch.constant.int 4
    %6030 = torch.aten.mul.int %int4_7057, %6029 : !torch.int, !torch.int -> !torch.int
    %int14336_7058 = torch.constant.int 14336
    %6031 = torch.prim.ListConstruct %6030, %int14336_7058 : (!torch.int, !torch.int) -> !torch.list<int>
    %6032 = torch.aten.view %6027, %6031 : !torch.vtensor<[4,?,14336],f16>, !torch.list<int> -> !torch.vtensor<[?,14336],f16>
    torch.bind_symbolic_shape %6032, [%292], affine_map<()[s0] -> (s0 * 128, 14336)> : !torch.vtensor<[?,14336],f16>
    %6033 = torch.aten.mm %6032, %6028 : !torch.vtensor<[?,14336],f16>, !torch.vtensor<[14336,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %6033, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_7059 = torch.constant.int 4
    %int4096_7060 = torch.constant.int 4096
    %6034 = torch.prim.ListConstruct %int4_7059, %6029, %int4096_7060 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6035 = torch.aten.view %6033, %6034 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %6035, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_7061 = torch.constant.int 1
    %6036 = torch.aten.add.Tensor %6002, %6035, %int1_7061 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %6036, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int6_7062 = torch.constant.int 6
    %6037 = torch.prims.convert_element_type %6036, %int6_7062 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %6037, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_7063 = torch.constant.int 2
    %6038 = torch.aten.pow.Tensor_Scalar %6037, %int2_7063 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %6038, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int-1_7064 = torch.constant.int -1
    %6039 = torch.prim.ListConstruct %int-1_7064 : (!torch.int) -> !torch.list<int>
    %true_7065 = torch.constant.bool true
    %none_7066 = torch.constant.none
    %6040 = torch.aten.mean.dim %6038, %6039, %true_7065, %none_7066 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %6040, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_7067 = torch.constant.float 9.9999997473787516E-6
    %int1_7068 = torch.constant.int 1
    %6041 = torch.aten.add.Scalar %6040, %float9.999990e-06_7067, %int1_7068 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %6041, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %6042 = torch.aten.rsqrt %6041 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %6042, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %6043 = torch.aten.mul.Tensor %6037, %6042 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %6043, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %6044 = torch.aten.mul.Tensor %262, %6043 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %6044, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int5_7069 = torch.constant.int 5
    %6045 = torch.prims.convert_element_type %6044, %int5_7069 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %6045, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int-2_7070 = torch.constant.int -2
    %int-1_7071 = torch.constant.int -1
    %6046 = torch.aten.transpose.int %263, %int-2_7070, %int-1_7071 : !torch.vtensor<[4096,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,4096],f16>
    %int4_7072 = torch.constant.int 4
    %6047 = torch.aten.mul.int %int4_7072, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_7073 = torch.constant.int 4096
    %6048 = torch.prim.ListConstruct %6047, %int4096_7073 : (!torch.int, !torch.int) -> !torch.list<int>
    %6049 = torch.aten.view %6045, %6048 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %6049, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %6050 = torch.aten.mm %6049, %6046 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %6050, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_7074 = torch.constant.int 4
    %int4096_7075 = torch.constant.int 4096
    %6051 = torch.prim.ListConstruct %int4_7074, %294, %int4096_7075 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6052 = torch.aten.view %6050, %6051 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %6052, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int-2_7076 = torch.constant.int -2
    %int-1_7077 = torch.constant.int -1
    %6053 = torch.aten.transpose.int %264, %int-2_7076, %int-1_7077 : !torch.vtensor<[1024,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,1024],f16>
    %int4_7078 = torch.constant.int 4
    %6054 = torch.aten.mul.int %int4_7078, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_7079 = torch.constant.int 4096
    %6055 = torch.prim.ListConstruct %6054, %int4096_7079 : (!torch.int, !torch.int) -> !torch.list<int>
    %6056 = torch.aten.view %6045, %6055 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %6056, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %6057 = torch.aten.mm %6056, %6053 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1024],f16> -> !torch.vtensor<[?,1024],f16>
    torch.bind_symbolic_shape %6057, [%292], affine_map<()[s0] -> (s0 * 128, 1024)> : !torch.vtensor<[?,1024],f16>
    %int4_7080 = torch.constant.int 4
    %int1024_7081 = torch.constant.int 1024
    %6058 = torch.prim.ListConstruct %int4_7080, %294, %int1024_7081 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6059 = torch.aten.view %6057, %6058 : !torch.vtensor<[?,1024],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1024],f16>
    torch.bind_symbolic_shape %6059, [%292], affine_map<()[s0] -> (4, s0 * 32, 1024)> : !torch.vtensor<[4,?,1024],f16>
    %int-2_7082 = torch.constant.int -2
    %int-1_7083 = torch.constant.int -1
    %6060 = torch.aten.transpose.int %265, %int-2_7082, %int-1_7083 : !torch.vtensor<[1024,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,1024],f16>
    %int4_7084 = torch.constant.int 4
    %6061 = torch.aten.mul.int %int4_7084, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_7085 = torch.constant.int 4096
    %6062 = torch.prim.ListConstruct %6061, %int4096_7085 : (!torch.int, !torch.int) -> !torch.list<int>
    %6063 = torch.aten.view %6045, %6062 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %6063, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %6064 = torch.aten.mm %6063, %6060 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1024],f16> -> !torch.vtensor<[?,1024],f16>
    torch.bind_symbolic_shape %6064, [%292], affine_map<()[s0] -> (s0 * 128, 1024)> : !torch.vtensor<[?,1024],f16>
    %int4_7086 = torch.constant.int 4
    %int1024_7087 = torch.constant.int 1024
    %6065 = torch.prim.ListConstruct %int4_7086, %294, %int1024_7087 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6066 = torch.aten.view %6064, %6065 : !torch.vtensor<[?,1024],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1024],f16>
    torch.bind_symbolic_shape %6066, [%292], affine_map<()[s0] -> (4, s0 * 32, 1024)> : !torch.vtensor<[4,?,1024],f16>
    %int4_7088 = torch.constant.int 4
    %int32_7089 = torch.constant.int 32
    %int128_7090 = torch.constant.int 128
    %6067 = torch.prim.ListConstruct %int4_7088, %294, %int32_7089, %int128_7090 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6068 = torch.aten.view %6052, %6067 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %6068, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int4_7091 = torch.constant.int 4
    %int8_7092 = torch.constant.int 8
    %int128_7093 = torch.constant.int 128
    %6069 = torch.prim.ListConstruct %int4_7091, %294, %int8_7092, %int128_7093 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6070 = torch.aten.view %6059, %6069 : !torch.vtensor<[4,?,1024],f16>, !torch.list<int> -> !torch.vtensor<[4,?,8,128],f16>
    torch.bind_symbolic_shape %6070, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 128)> : !torch.vtensor<[4,?,8,128],f16>
    %int4_7094 = torch.constant.int 4
    %int8_7095 = torch.constant.int 8
    %int128_7096 = torch.constant.int 128
    %6071 = torch.prim.ListConstruct %int4_7094, %294, %int8_7095, %int128_7096 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6072 = torch.aten.view %6066, %6071 : !torch.vtensor<[4,?,1024],f16>, !torch.list<int> -> !torch.vtensor<[4,?,8,128],f16>
    torch.bind_symbolic_shape %6072, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 128)> : !torch.vtensor<[4,?,8,128],f16>
    %int131072_7097 = torch.constant.int 131072
    %none_7098 = torch.constant.none
    %none_7099 = torch.constant.none
    %cpu_7100 = torch.constant.device "cpu"
    %false_7101 = torch.constant.bool false
    %6073 = torch.aten.arange %int131072_7097, %none_7098, %none_7099, %cpu_7100, %false_7101 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64>
    %int0_7102 = torch.constant.int 0
    %int128_7103 = torch.constant.int 128
    %int2_7104 = torch.constant.int 2
    %none_7105 = torch.constant.none
    %none_7106 = torch.constant.none
    %cpu_7107 = torch.constant.device "cpu"
    %false_7108 = torch.constant.bool false
    %6074 = torch.aten.arange.start_step %int0_7102, %int128_7103, %int2_7104, %none_7105, %none_7106, %cpu_7107, %false_7108 : !torch.int, !torch.int, !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64>
    %int0_7109 = torch.constant.int 0
    %int0_7110 = torch.constant.int 0
    %int64_7111 = torch.constant.int 64
    %int1_7112 = torch.constant.int 1
    %6075 = torch.aten.slice.Tensor %6074, %int0_7109, %int0_7110, %int64_7111, %int1_7112 : !torch.vtensor<[64],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[64],si64>
    %int6_7113 = torch.constant.int 6
    %6076 = torch.prims.convert_element_type %6075, %int6_7113 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32>
    %int128_7114 = torch.constant.int 128
    %6077 = torch.aten.div.Scalar %6076, %int128_7114 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32>
    %float5.000000e05_7115 = torch.constant.float 5.000000e+05
    %6078 = torch.aten.pow.Scalar %float5.000000e05_7115, %6077 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %6079 = torch.aten.reciprocal %6078 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %float1.000000e00_7116 = torch.constant.float 1.000000e+00
    %6080 = torch.aten.mul.Scalar %6079, %float1.000000e00_7116 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32>
    %int131072_7117 = torch.constant.int 131072
    %int1_7118 = torch.constant.int 1
    %6081 = torch.prim.ListConstruct %int131072_7117, %int1_7118 : (!torch.int, !torch.int) -> !torch.list<int>
    %6082 = torch.aten.view %6073, %6081 : !torch.vtensor<[131072],si64>, !torch.list<int> -> !torch.vtensor<[131072,1],si64>
    %6083 = torch.aten.mul.Tensor %6082, %6080 : !torch.vtensor<[131072,1],si64>, !torch.vtensor<[64],f32> -> !torch.vtensor<[131072,64],f32>
    %6084 = torch.aten.cos %6083 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %6085 = torch.aten.sin %6083 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %6086 = torch.aten.complex %6084, %6085 : !torch.vtensor<[131072,64],f32>, !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],complex<f32>>
    %int1_7119 = torch.constant.int 1
    %6087 = torch.aten.size.int %6052, %int1_7119 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.int
    %int0_7120 = torch.constant.int 0
    %6088 = torch.aten.add.int %int0_7120, %6087 : !torch.int, !torch.int -> !torch.int
    %int0_7121 = torch.constant.int 0
    %int0_7122 = torch.constant.int 0
    %int1_7123 = torch.constant.int 1
    %6089 = torch.aten.slice.Tensor %6086, %int0_7121, %int0_7122, %6088, %int1_7123 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %6089, [%292], affine_map<()[s0] -> (s0 * 32, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_7124 = torch.constant.int 1
    %int0_7125 = torch.constant.int 0
    %int9223372036854775807_7126 = torch.constant.int 9223372036854775807
    %int1_7127 = torch.constant.int 1
    %6090 = torch.aten.slice.Tensor %6089, %int1_7124, %int0_7125, %int9223372036854775807_7126, %int1_7127 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %6090, [%292], affine_map<()[s0] -> (s0 * 32, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_7128 = torch.constant.int 0
    %6091 = torch.aten.unsqueeze %6090, %int0_7128 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %6091, [%292], affine_map<()[s0] -> (1, s0 * 32, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_7129 = torch.constant.int 2
    %6092 = torch.aten.unsqueeze %6091, %int2_7129 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %6092, [%292], affine_map<()[s0] -> (1, s0 * 32, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_7130 = torch.constant.int 3
    %int0_7131 = torch.constant.int 0
    %int9223372036854775807_7132 = torch.constant.int 9223372036854775807
    %int1_7133 = torch.constant.int 1
    %6093 = torch.aten.slice.Tensor %6092, %int3_7130, %int0_7131, %int9223372036854775807_7132, %int1_7133 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %6093, [%292], affine_map<()[s0] -> (1, s0 * 32, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %6094 = torch_c.to_builtin_tensor %6068 : !torch.vtensor<[4,?,32,128],f16> -> tensor<4x?x32x128xf16>
    %c1_7134 = arith.constant 1 : index
    %dim_7135 = tensor.dim %6094, %c1_7134 : tensor<4x?x32x128xf16>
    %6095 = flow.tensor.bitcast %6094 : tensor<4x?x32x128xf16>{%dim_7135} -> tensor<4x?x32x64xcomplex<f16>>{%dim_7135}
    %6096 = torch_c.from_builtin_tensor %6095 : tensor<4x?x32x64xcomplex<f16>> -> !torch.vtensor<[4,?,32,64],complex<f16>>
    torch.bind_symbolic_shape %6096, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 64)> : !torch.vtensor<[4,?,32,64],complex<f16>>
    %6097 = torch.aten.mul.Tensor %6096, %6093 : !torch.vtensor<[4,?,32,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,32,64],complex<f32>>
    torch.bind_symbolic_shape %6097, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 64)> : !torch.vtensor<[4,?,32,64],complex<f32>>
    %6098 = torch_c.to_builtin_tensor %6097 : !torch.vtensor<[4,?,32,64],complex<f32>> -> tensor<4x?x32x64xcomplex<f32>>
    %c1_7136 = arith.constant 1 : index
    %dim_7137 = tensor.dim %6098, %c1_7136 : tensor<4x?x32x64xcomplex<f32>>
    %6099 = flow.tensor.bitcast %6098 : tensor<4x?x32x64xcomplex<f32>>{%dim_7137} -> tensor<4x?x32x128xf32>{%dim_7137}
    %6100 = torch_c.from_builtin_tensor %6099 : tensor<4x?x32x128xf32> -> !torch.vtensor<[4,?,32,128],f32>
    torch.bind_symbolic_shape %6100, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f32>
    %int5_7138 = torch.constant.int 5
    %6101 = torch.prims.convert_element_type %6100, %int5_7138 : !torch.vtensor<[4,?,32,128],f32>, !torch.int -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %6101, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int131072_7139 = torch.constant.int 131072
    %none_7140 = torch.constant.none
    %none_7141 = torch.constant.none
    %cpu_7142 = torch.constant.device "cpu"
    %false_7143 = torch.constant.bool false
    %6102 = torch.aten.arange %int131072_7139, %none_7140, %none_7141, %cpu_7142, %false_7143 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64>
    %int0_7144 = torch.constant.int 0
    %int128_7145 = torch.constant.int 128
    %int2_7146 = torch.constant.int 2
    %none_7147 = torch.constant.none
    %none_7148 = torch.constant.none
    %cpu_7149 = torch.constant.device "cpu"
    %false_7150 = torch.constant.bool false
    %6103 = torch.aten.arange.start_step %int0_7144, %int128_7145, %int2_7146, %none_7147, %none_7148, %cpu_7149, %false_7150 : !torch.int, !torch.int, !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64>
    %int0_7151 = torch.constant.int 0
    %int0_7152 = torch.constant.int 0
    %int64_7153 = torch.constant.int 64
    %int1_7154 = torch.constant.int 1
    %6104 = torch.aten.slice.Tensor %6103, %int0_7151, %int0_7152, %int64_7153, %int1_7154 : !torch.vtensor<[64],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[64],si64>
    %int6_7155 = torch.constant.int 6
    %6105 = torch.prims.convert_element_type %6104, %int6_7155 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32>
    %int128_7156 = torch.constant.int 128
    %6106 = torch.aten.div.Scalar %6105, %int128_7156 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32>
    %float5.000000e05_7157 = torch.constant.float 5.000000e+05
    %6107 = torch.aten.pow.Scalar %float5.000000e05_7157, %6106 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %6108 = torch.aten.reciprocal %6107 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %float1.000000e00_7158 = torch.constant.float 1.000000e+00
    %6109 = torch.aten.mul.Scalar %6108, %float1.000000e00_7158 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32>
    %int131072_7159 = torch.constant.int 131072
    %int1_7160 = torch.constant.int 1
    %6110 = torch.prim.ListConstruct %int131072_7159, %int1_7160 : (!torch.int, !torch.int) -> !torch.list<int>
    %6111 = torch.aten.view %6102, %6110 : !torch.vtensor<[131072],si64>, !torch.list<int> -> !torch.vtensor<[131072,1],si64>
    %6112 = torch.aten.mul.Tensor %6111, %6109 : !torch.vtensor<[131072,1],si64>, !torch.vtensor<[64],f32> -> !torch.vtensor<[131072,64],f32>
    %6113 = torch.aten.cos %6112 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %6114 = torch.aten.sin %6112 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %6115 = torch.aten.complex %6113, %6114 : !torch.vtensor<[131072,64],f32>, !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],complex<f32>>
    %int1_7161 = torch.constant.int 1
    %6116 = torch.aten.size.int %6059, %int1_7161 : !torch.vtensor<[4,?,1024],f16>, !torch.int -> !torch.int
    %int0_7162 = torch.constant.int 0
    %6117 = torch.aten.add.int %int0_7162, %6116 : !torch.int, !torch.int -> !torch.int
    %int0_7163 = torch.constant.int 0
    %int0_7164 = torch.constant.int 0
    %int1_7165 = torch.constant.int 1
    %6118 = torch.aten.slice.Tensor %6115, %int0_7163, %int0_7164, %6117, %int1_7165 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %6118, [%292], affine_map<()[s0] -> (s0 * 32, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_7166 = torch.constant.int 1
    %int0_7167 = torch.constant.int 0
    %int9223372036854775807_7168 = torch.constant.int 9223372036854775807
    %int1_7169 = torch.constant.int 1
    %6119 = torch.aten.slice.Tensor %6118, %int1_7166, %int0_7167, %int9223372036854775807_7168, %int1_7169 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %6119, [%292], affine_map<()[s0] -> (s0 * 32, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_7170 = torch.constant.int 0
    %6120 = torch.aten.unsqueeze %6119, %int0_7170 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %6120, [%292], affine_map<()[s0] -> (1, s0 * 32, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_7171 = torch.constant.int 2
    %6121 = torch.aten.unsqueeze %6120, %int2_7171 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %6121, [%292], affine_map<()[s0] -> (1, s0 * 32, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_7172 = torch.constant.int 3
    %int0_7173 = torch.constant.int 0
    %int9223372036854775807_7174 = torch.constant.int 9223372036854775807
    %int1_7175 = torch.constant.int 1
    %6122 = torch.aten.slice.Tensor %6121, %int3_7172, %int0_7173, %int9223372036854775807_7174, %int1_7175 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %6122, [%292], affine_map<()[s0] -> (1, s0 * 32, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %6123 = torch_c.to_builtin_tensor %6070 : !torch.vtensor<[4,?,8,128],f16> -> tensor<4x?x8x128xf16>
    %c1_7176 = arith.constant 1 : index
    %dim_7177 = tensor.dim %6123, %c1_7176 : tensor<4x?x8x128xf16>
    %6124 = flow.tensor.bitcast %6123 : tensor<4x?x8x128xf16>{%dim_7177} -> tensor<4x?x8x64xcomplex<f16>>{%dim_7177}
    %6125 = torch_c.from_builtin_tensor %6124 : tensor<4x?x8x64xcomplex<f16>> -> !torch.vtensor<[4,?,8,64],complex<f16>>
    torch.bind_symbolic_shape %6125, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 64)> : !torch.vtensor<[4,?,8,64],complex<f16>>
    %6126 = torch.aten.mul.Tensor %6125, %6122 : !torch.vtensor<[4,?,8,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,8,64],complex<f32>>
    torch.bind_symbolic_shape %6126, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 64)> : !torch.vtensor<[4,?,8,64],complex<f32>>
    %6127 = torch_c.to_builtin_tensor %6126 : !torch.vtensor<[4,?,8,64],complex<f32>> -> tensor<4x?x8x64xcomplex<f32>>
    %c1_7178 = arith.constant 1 : index
    %dim_7179 = tensor.dim %6127, %c1_7178 : tensor<4x?x8x64xcomplex<f32>>
    %6128 = flow.tensor.bitcast %6127 : tensor<4x?x8x64xcomplex<f32>>{%dim_7179} -> tensor<4x?x8x128xf32>{%dim_7179}
    %6129 = torch_c.from_builtin_tensor %6128 : tensor<4x?x8x128xf32> -> !torch.vtensor<[4,?,8,128],f32>
    torch.bind_symbolic_shape %6129, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 128)> : !torch.vtensor<[4,?,8,128],f32>
    %int5_7180 = torch.constant.int 5
    %6130 = torch.prims.convert_element_type %6129, %int5_7180 : !torch.vtensor<[4,?,8,128],f32>, !torch.int -> !torch.vtensor<[4,?,8,128],f16>
    torch.bind_symbolic_shape %6130, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 128)> : !torch.vtensor<[4,?,8,128],f16>
    %int64_7181 = torch.constant.int 64
    %6131 = torch.aten.mul.Scalar %arg2, %int64_7181 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %6131, [%292], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int58 = torch.constant.int 58
    %int1_7182 = torch.constant.int 1
    %6132 = torch.aten.add.Scalar %6131, %int58, %int1_7182 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %6132, [%292], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int4_7183 = torch.constant.int 4
    %int32_7184 = torch.constant.int 32
    %int8_7185 = torch.constant.int 8
    %int128_7186 = torch.constant.int 128
    %6133 = torch.prim.ListConstruct %int4_7183, %425, %int32_7184, %int8_7185, %int128_7186 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6134 = torch.aten.view %6130, %6133 : !torch.vtensor<[4,?,8,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,8,128],f16>
    torch.bind_symbolic_shape %6134, [%292], affine_map<()[s0] -> (4, s0, 32, 8, 128)> : !torch.vtensor<[4,?,32,8,128],f16>
    %int4_7187 = torch.constant.int 4
    %6135 = torch.aten.mul.int %int4_7187, %425 : !torch.int, !torch.int -> !torch.int
    %int32_7188 = torch.constant.int 32
    %int8_7189 = torch.constant.int 8
    %int128_7190 = torch.constant.int 128
    %6136 = torch.prim.ListConstruct %6135, %int32_7188, %int8_7189, %int128_7190 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6137 = torch.aten.view %6134, %6136 : !torch.vtensor<[4,?,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %6137, [%292], affine_map<()[s0] -> (s0 * 4, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int4_7191 = torch.constant.int 4
    %6138 = torch.aten.mul.int %int4_7191, %425 : !torch.int, !torch.int -> !torch.int
    %6139 = torch.prim.ListConstruct %6138 : (!torch.int) -> !torch.list<int>
    %6140 = torch.aten.view %6132, %6139 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %6140, [%292], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int32_7192 = torch.constant.int 32
    %int2_7193 = torch.constant.int 2
    %int32_7194 = torch.constant.int 32
    %int8_7195 = torch.constant.int 8
    %int128_7196 = torch.constant.int 128
    %6141 = torch.prim.ListConstruct %416, %int32_7192, %int2_7193, %int32_7194, %int8_7195, %int128_7196 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6142 = torch.aten.view %5974, %6141 : !torch.vtensor<[?,2097152],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f16>
    torch.bind_symbolic_shape %6142, [%293], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f16>
    %int32_7197 = torch.constant.int 32
    %6143 = torch.aten.mul.int %416, %int32_7197 : !torch.int, !torch.int -> !torch.int
    %int2_7198 = torch.constant.int 2
    %6144 = torch.aten.mul.int %6143, %int2_7198 : !torch.int, !torch.int -> !torch.int
    %int32_7199 = torch.constant.int 32
    %int8_7200 = torch.constant.int 8
    %int128_7201 = torch.constant.int 128
    %6145 = torch.prim.ListConstruct %6144, %int32_7199, %int8_7200, %int128_7201 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6146 = torch.aten.view %6142, %6145 : !torch.vtensor<[?,32,2,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %6146, [%293], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %6147 = torch.prim.ListConstruct %6140 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_7202 = torch.constant.bool false
    %6148 = torch.aten.index_put %6146, %6147, %6137, %false_7202 : !torch.vtensor<[?,32,8,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,32,8,128],f16>, !torch.bool -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %6148, [%293], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int32_7203 = torch.constant.int 32
    %int2_7204 = torch.constant.int 2
    %int32_7205 = torch.constant.int 32
    %int8_7206 = torch.constant.int 8
    %int128_7207 = torch.constant.int 128
    %6149 = torch.prim.ListConstruct %416, %int32_7203, %int2_7204, %int32_7205, %int8_7206, %int128_7207 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6150 = torch.aten.view %6148, %6149 : !torch.vtensor<[?,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f16>
    torch.bind_symbolic_shape %6150, [%293], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f16>
    %int2097152_7208 = torch.constant.int 2097152
    %6151 = torch.prim.ListConstruct %416, %int2097152_7208 : (!torch.int, !torch.int) -> !torch.list<int>
    %6152 = torch.aten.view %6150, %6151 : !torch.vtensor<[?,32,2,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,2097152],f16>
    torch.bind_symbolic_shape %6152, [%293], affine_map<()[s0] -> (s0, 2097152)> : !torch.vtensor<[?,2097152],f16>
    %int32_7209 = torch.constant.int 32
    %int2_7210 = torch.constant.int 2
    %int32_7211 = torch.constant.int 32
    %int8_7212 = torch.constant.int 8
    %int128_7213 = torch.constant.int 128
    %6153 = torch.prim.ListConstruct %416, %int32_7209, %int2_7210, %int32_7211, %int8_7212, %int128_7213 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6154 = torch.aten.view %6152, %6153 : !torch.vtensor<[?,2097152],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f16>
    torch.bind_symbolic_shape %6154, [%293], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f16>
    %int32_7214 = torch.constant.int 32
    %int8_7215 = torch.constant.int 8
    %int128_7216 = torch.constant.int 128
    %6155 = torch.prim.ListConstruct %6144, %int32_7214, %int8_7215, %int128_7216 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6156 = torch.aten.view %6154, %6155 : !torch.vtensor<[?,32,2,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %6156, [%293], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int4_7217 = torch.constant.int 4
    %int32_7218 = torch.constant.int 32
    %int8_7219 = torch.constant.int 8
    %int128_7220 = torch.constant.int 128
    %6157 = torch.prim.ListConstruct %int4_7217, %425, %int32_7218, %int8_7219, %int128_7220 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6158 = torch.aten.view %6072, %6157 : !torch.vtensor<[4,?,8,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,8,128],f16>
    torch.bind_symbolic_shape %6158, [%292], affine_map<()[s0] -> (4, s0, 32, 8, 128)> : !torch.vtensor<[4,?,32,8,128],f16>
    %int4_7221 = torch.constant.int 4
    %6159 = torch.aten.mul.int %int4_7221, %425 : !torch.int, !torch.int -> !torch.int
    %int32_7222 = torch.constant.int 32
    %int8_7223 = torch.constant.int 8
    %int128_7224 = torch.constant.int 128
    %6160 = torch.prim.ListConstruct %6159, %int32_7222, %int8_7223, %int128_7224 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6161 = torch.aten.view %6158, %6160 : !torch.vtensor<[4,?,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %6161, [%292], affine_map<()[s0] -> (s0 * 4, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int1_7225 = torch.constant.int 1
    %int1_7226 = torch.constant.int 1
    %6162 = torch.aten.add.Scalar %6132, %int1_7225, %int1_7226 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %6162, [%292], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int4_7227 = torch.constant.int 4
    %6163 = torch.aten.mul.int %int4_7227, %425 : !torch.int, !torch.int -> !torch.int
    %6164 = torch.prim.ListConstruct %6163 : (!torch.int) -> !torch.list<int>
    %6165 = torch.aten.view %6162, %6164 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %6165, [%292], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %6166 = torch.prim.ListConstruct %6165 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_7228 = torch.constant.bool false
    %6167 = torch.aten.index_put %6156, %6166, %6161, %false_7228 : !torch.vtensor<[?,32,8,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,32,8,128],f16>, !torch.bool -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %6167, [%293], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int32_7229 = torch.constant.int 32
    %int2_7230 = torch.constant.int 2
    %int32_7231 = torch.constant.int 32
    %int8_7232 = torch.constant.int 8
    %int128_7233 = torch.constant.int 128
    %6168 = torch.prim.ListConstruct %416, %int32_7229, %int2_7230, %int32_7231, %int8_7232, %int128_7233 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6169 = torch.aten.view %6167, %6168 : !torch.vtensor<[?,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f16>
    torch.bind_symbolic_shape %6169, [%293], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f16>
    %int2097152_7234 = torch.constant.int 2097152
    %6170 = torch.prim.ListConstruct %416, %int2097152_7234 : (!torch.int, !torch.int) -> !torch.list<int>
    %6171 = torch.aten.view %6169, %6170 : !torch.vtensor<[?,32,2,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,2097152],f16>
    torch.bind_symbolic_shape %6171, [%293], affine_map<()[s0] -> (s0, 2097152)> : !torch.vtensor<[?,2097152],f16>
    %int-2_7235 = torch.constant.int -2
    %6172 = torch.aten.unsqueeze %6130, %int-2_7235 : !torch.vtensor<[4,?,8,128],f16>, !torch.int -> !torch.vtensor<[4,?,8,1,128],f16>
    torch.bind_symbolic_shape %6172, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 1, 128)> : !torch.vtensor<[4,?,8,1,128],f16>
    %int4_7236 = torch.constant.int 4
    %int8_7237 = torch.constant.int 8
    %int4_7238 = torch.constant.int 4
    %int128_7239 = torch.constant.int 128
    %6173 = torch.prim.ListConstruct %int4_7236, %6116, %int8_7237, %int4_7238, %int128_7239 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_7240 = torch.constant.bool false
    %6174 = torch.aten.expand %6172, %6173, %false_7240 : !torch.vtensor<[4,?,8,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,8,4,128],f16>
    torch.bind_symbolic_shape %6174, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 4, 128)> : !torch.vtensor<[4,?,8,4,128],f16>
    %int0_7241 = torch.constant.int 0
    %6175 = torch.aten.clone %6174, %int0_7241 : !torch.vtensor<[4,?,8,4,128],f16>, !torch.int -> !torch.vtensor<[4,?,8,4,128],f16>
    torch.bind_symbolic_shape %6175, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 4, 128)> : !torch.vtensor<[4,?,8,4,128],f16>
    %int4_7242 = torch.constant.int 4
    %int32_7243 = torch.constant.int 32
    %int128_7244 = torch.constant.int 128
    %6176 = torch.prim.ListConstruct %int4_7242, %6116, %int32_7243, %int128_7244 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6177 = torch.aten._unsafe_view %6175, %6176 : !torch.vtensor<[4,?,8,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %6177, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int-2_7245 = torch.constant.int -2
    %6178 = torch.aten.unsqueeze %6072, %int-2_7245 : !torch.vtensor<[4,?,8,128],f16>, !torch.int -> !torch.vtensor<[4,?,8,1,128],f16>
    torch.bind_symbolic_shape %6178, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 1, 128)> : !torch.vtensor<[4,?,8,1,128],f16>
    %int1_7246 = torch.constant.int 1
    %6179 = torch.aten.size.int %6066, %int1_7246 : !torch.vtensor<[4,?,1024],f16>, !torch.int -> !torch.int
    %int4_7247 = torch.constant.int 4
    %int8_7248 = torch.constant.int 8
    %int4_7249 = torch.constant.int 4
    %int128_7250 = torch.constant.int 128
    %6180 = torch.prim.ListConstruct %int4_7247, %6179, %int8_7248, %int4_7249, %int128_7250 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_7251 = torch.constant.bool false
    %6181 = torch.aten.expand %6178, %6180, %false_7251 : !torch.vtensor<[4,?,8,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,8,4,128],f16>
    torch.bind_symbolic_shape %6181, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 4, 128)> : !torch.vtensor<[4,?,8,4,128],f16>
    %int0_7252 = torch.constant.int 0
    %6182 = torch.aten.clone %6181, %int0_7252 : !torch.vtensor<[4,?,8,4,128],f16>, !torch.int -> !torch.vtensor<[4,?,8,4,128],f16>
    torch.bind_symbolic_shape %6182, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 4, 128)> : !torch.vtensor<[4,?,8,4,128],f16>
    %int4_7253 = torch.constant.int 4
    %int32_7254 = torch.constant.int 32
    %int128_7255 = torch.constant.int 128
    %6183 = torch.prim.ListConstruct %int4_7253, %6179, %int32_7254, %int128_7255 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6184 = torch.aten._unsafe_view %6182, %6183 : !torch.vtensor<[4,?,8,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %6184, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int1_7256 = torch.constant.int 1
    %int2_7257 = torch.constant.int 2
    %6185 = torch.aten.transpose.int %6101, %int1_7256, %int2_7257 : !torch.vtensor<[4,?,32,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,32,?,128],f16>
    torch.bind_symbolic_shape %6185, [%292], affine_map<()[s0] -> (4, 32, s0 * 32, 128)> : !torch.vtensor<[4,32,?,128],f16>
    %int1_7258 = torch.constant.int 1
    %int2_7259 = torch.constant.int 2
    %6186 = torch.aten.transpose.int %6177, %int1_7258, %int2_7259 : !torch.vtensor<[4,?,32,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,32,?,128],f16>
    torch.bind_symbolic_shape %6186, [%292], affine_map<()[s0] -> (4, 32, s0 * 32, 128)> : !torch.vtensor<[4,32,?,128],f16>
    %int1_7260 = torch.constant.int 1
    %int2_7261 = torch.constant.int 2
    %6187 = torch.aten.transpose.int %6184, %int1_7260, %int2_7261 : !torch.vtensor<[4,?,32,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,32,?,128],f16>
    torch.bind_symbolic_shape %6187, [%292], affine_map<()[s0] -> (4, 32, s0 * 32, 128)> : !torch.vtensor<[4,32,?,128],f16>
    %float0.000000e00_7262 = torch.constant.float 0.000000e+00
    %false_7263 = torch.constant.bool false
    %none_7264 = torch.constant.none
    %6188:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%6185, %6186, %6187, %float0.000000e00_7262, %false_7263, %320, %none_7264) : (!torch.vtensor<[4,32,?,128],f16>, !torch.vtensor<[4,32,?,128],f16>, !torch.vtensor<[4,32,?,128],f16>, !torch.float, !torch.bool, !torch.vtensor<[4,1,?,?],f16>, !torch.none) -> (!torch.vtensor<[4,32,?,128],f16>, !torch.vtensor<[4,32,?],f32>) 
    torch.bind_symbolic_shape %6188#0, [%292], affine_map<()[s0] -> (4, 32, s0 * 32, 128)> : !torch.vtensor<[4,32,?,128],f16>
    %int1_7265 = torch.constant.int 1
    %int2_7266 = torch.constant.int 2
    %6189 = torch.aten.transpose.int %6188#0, %int1_7265, %int2_7266 : !torch.vtensor<[4,32,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %6189, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int4_7267 = torch.constant.int 4
    %int4096_7268 = torch.constant.int 4096
    %6190 = torch.prim.ListConstruct %int4_7267, %6087, %int4096_7268 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6191 = torch.aten.view %6189, %6190 : !torch.vtensor<[4,?,32,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %6191, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int-2_7269 = torch.constant.int -2
    %int-1_7270 = torch.constant.int -1
    %6192 = torch.aten.transpose.int %266, %int-2_7269, %int-1_7270 : !torch.vtensor<[4096,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,4096],f16>
    %int4_7271 = torch.constant.int 4
    %6193 = torch.aten.mul.int %int4_7271, %6087 : !torch.int, !torch.int -> !torch.int
    %int4096_7272 = torch.constant.int 4096
    %6194 = torch.prim.ListConstruct %6193, %int4096_7272 : (!torch.int, !torch.int) -> !torch.list<int>
    %6195 = torch.aten.view %6191, %6194 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %6195, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %6196 = torch.aten.mm %6195, %6192 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %6196, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_7273 = torch.constant.int 4
    %int4096_7274 = torch.constant.int 4096
    %6197 = torch.prim.ListConstruct %int4_7273, %6087, %int4096_7274 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6198 = torch.aten.view %6196, %6197 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %6198, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_7275 = torch.constant.int 1
    %6199 = torch.aten.add.Tensor %6036, %6198, %int1_7275 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %6199, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int6_7276 = torch.constant.int 6
    %6200 = torch.prims.convert_element_type %6199, %int6_7276 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %6200, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_7277 = torch.constant.int 2
    %6201 = torch.aten.pow.Tensor_Scalar %6200, %int2_7277 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %6201, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int-1_7278 = torch.constant.int -1
    %6202 = torch.prim.ListConstruct %int-1_7278 : (!torch.int) -> !torch.list<int>
    %true_7279 = torch.constant.bool true
    %none_7280 = torch.constant.none
    %6203 = torch.aten.mean.dim %6201, %6202, %true_7279, %none_7280 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %6203, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_7281 = torch.constant.float 9.9999997473787516E-6
    %int1_7282 = torch.constant.int 1
    %6204 = torch.aten.add.Scalar %6203, %float9.999990e-06_7281, %int1_7282 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %6204, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %6205 = torch.aten.rsqrt %6204 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %6205, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %6206 = torch.aten.mul.Tensor %6200, %6205 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %6206, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %6207 = torch.aten.mul.Tensor %267, %6206 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %6207, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int5_7283 = torch.constant.int 5
    %6208 = torch.prims.convert_element_type %6207, %int5_7283 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %6208, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int-2_7284 = torch.constant.int -2
    %int-1_7285 = torch.constant.int -1
    %6209 = torch.aten.transpose.int %268, %int-2_7284, %int-1_7285 : !torch.vtensor<[14336,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,14336],f16>
    %int4_7286 = torch.constant.int 4
    %6210 = torch.aten.mul.int %int4_7286, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_7287 = torch.constant.int 4096
    %6211 = torch.prim.ListConstruct %6210, %int4096_7287 : (!torch.int, !torch.int) -> !torch.list<int>
    %6212 = torch.aten.view %6208, %6211 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %6212, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %6213 = torch.aten.mm %6212, %6209 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,14336],f16> -> !torch.vtensor<[?,14336],f16>
    torch.bind_symbolic_shape %6213, [%292], affine_map<()[s0] -> (s0 * 128, 14336)> : !torch.vtensor<[?,14336],f16>
    %int4_7288 = torch.constant.int 4
    %int14336_7289 = torch.constant.int 14336
    %6214 = torch.prim.ListConstruct %int4_7288, %294, %int14336_7289 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6215 = torch.aten.view %6213, %6214 : !torch.vtensor<[?,14336],f16>, !torch.list<int> -> !torch.vtensor<[4,?,14336],f16>
    torch.bind_symbolic_shape %6215, [%292], affine_map<()[s0] -> (4, s0 * 32, 14336)> : !torch.vtensor<[4,?,14336],f16>
    %6216 = torch.aten.silu %6215 : !torch.vtensor<[4,?,14336],f16> -> !torch.vtensor<[4,?,14336],f16>
    torch.bind_symbolic_shape %6216, [%292], affine_map<()[s0] -> (4, s0 * 32, 14336)> : !torch.vtensor<[4,?,14336],f16>
    %int-2_7290 = torch.constant.int -2
    %int-1_7291 = torch.constant.int -1
    %6217 = torch.aten.transpose.int %269, %int-2_7290, %int-1_7291 : !torch.vtensor<[14336,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,14336],f16>
    %int4_7292 = torch.constant.int 4
    %6218 = torch.aten.mul.int %int4_7292, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_7293 = torch.constant.int 4096
    %6219 = torch.prim.ListConstruct %6218, %int4096_7293 : (!torch.int, !torch.int) -> !torch.list<int>
    %6220 = torch.aten.view %6208, %6219 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %6220, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %6221 = torch.aten.mm %6220, %6217 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,14336],f16> -> !torch.vtensor<[?,14336],f16>
    torch.bind_symbolic_shape %6221, [%292], affine_map<()[s0] -> (s0 * 128, 14336)> : !torch.vtensor<[?,14336],f16>
    %int4_7294 = torch.constant.int 4
    %int14336_7295 = torch.constant.int 14336
    %6222 = torch.prim.ListConstruct %int4_7294, %294, %int14336_7295 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6223 = torch.aten.view %6221, %6222 : !torch.vtensor<[?,14336],f16>, !torch.list<int> -> !torch.vtensor<[4,?,14336],f16>
    torch.bind_symbolic_shape %6223, [%292], affine_map<()[s0] -> (4, s0 * 32, 14336)> : !torch.vtensor<[4,?,14336],f16>
    %6224 = torch.aten.mul.Tensor %6216, %6223 : !torch.vtensor<[4,?,14336],f16>, !torch.vtensor<[4,?,14336],f16> -> !torch.vtensor<[4,?,14336],f16>
    torch.bind_symbolic_shape %6224, [%292], affine_map<()[s0] -> (4, s0 * 32, 14336)> : !torch.vtensor<[4,?,14336],f16>
    %int-2_7296 = torch.constant.int -2
    %int-1_7297 = torch.constant.int -1
    %6225 = torch.aten.transpose.int %270, %int-2_7296, %int-1_7297 : !torch.vtensor<[4096,14336],f16>, !torch.int, !torch.int -> !torch.vtensor<[14336,4096],f16>
    %int1_7298 = torch.constant.int 1
    %6226 = torch.aten.size.int %6215, %int1_7298 : !torch.vtensor<[4,?,14336],f16>, !torch.int -> !torch.int
    %int4_7299 = torch.constant.int 4
    %6227 = torch.aten.mul.int %int4_7299, %6226 : !torch.int, !torch.int -> !torch.int
    %int14336_7300 = torch.constant.int 14336
    %6228 = torch.prim.ListConstruct %6227, %int14336_7300 : (!torch.int, !torch.int) -> !torch.list<int>
    %6229 = torch.aten.view %6224, %6228 : !torch.vtensor<[4,?,14336],f16>, !torch.list<int> -> !torch.vtensor<[?,14336],f16>
    torch.bind_symbolic_shape %6229, [%292], affine_map<()[s0] -> (s0 * 128, 14336)> : !torch.vtensor<[?,14336],f16>
    %6230 = torch.aten.mm %6229, %6225 : !torch.vtensor<[?,14336],f16>, !torch.vtensor<[14336,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %6230, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_7301 = torch.constant.int 4
    %int4096_7302 = torch.constant.int 4096
    %6231 = torch.prim.ListConstruct %int4_7301, %6226, %int4096_7302 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6232 = torch.aten.view %6230, %6231 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %6232, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_7303 = torch.constant.int 1
    %6233 = torch.aten.add.Tensor %6199, %6232, %int1_7303 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %6233, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int6_7304 = torch.constant.int 6
    %6234 = torch.prims.convert_element_type %6233, %int6_7304 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %6234, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_7305 = torch.constant.int 2
    %6235 = torch.aten.pow.Tensor_Scalar %6234, %int2_7305 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %6235, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int-1_7306 = torch.constant.int -1
    %6236 = torch.prim.ListConstruct %int-1_7306 : (!torch.int) -> !torch.list<int>
    %true_7307 = torch.constant.bool true
    %none_7308 = torch.constant.none
    %6237 = torch.aten.mean.dim %6235, %6236, %true_7307, %none_7308 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %6237, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_7309 = torch.constant.float 9.9999997473787516E-6
    %int1_7310 = torch.constant.int 1
    %6238 = torch.aten.add.Scalar %6237, %float9.999990e-06_7309, %int1_7310 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %6238, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %6239 = torch.aten.rsqrt %6238 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %6239, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %6240 = torch.aten.mul.Tensor %6234, %6239 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %6240, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %6241 = torch.aten.mul.Tensor %271, %6240 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %6241, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int5_7311 = torch.constant.int 5
    %6242 = torch.prims.convert_element_type %6241, %int5_7311 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %6242, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int-2_7312 = torch.constant.int -2
    %int-1_7313 = torch.constant.int -1
    %6243 = torch.aten.transpose.int %272, %int-2_7312, %int-1_7313 : !torch.vtensor<[4096,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,4096],f16>
    %int4_7314 = torch.constant.int 4
    %6244 = torch.aten.mul.int %int4_7314, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_7315 = torch.constant.int 4096
    %6245 = torch.prim.ListConstruct %6244, %int4096_7315 : (!torch.int, !torch.int) -> !torch.list<int>
    %6246 = torch.aten.view %6242, %6245 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %6246, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %6247 = torch.aten.mm %6246, %6243 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %6247, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_7316 = torch.constant.int 4
    %int4096_7317 = torch.constant.int 4096
    %6248 = torch.prim.ListConstruct %int4_7316, %294, %int4096_7317 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6249 = torch.aten.view %6247, %6248 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %6249, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int-2_7318 = torch.constant.int -2
    %int-1_7319 = torch.constant.int -1
    %6250 = torch.aten.transpose.int %273, %int-2_7318, %int-1_7319 : !torch.vtensor<[1024,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,1024],f16>
    %int4_7320 = torch.constant.int 4
    %6251 = torch.aten.mul.int %int4_7320, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_7321 = torch.constant.int 4096
    %6252 = torch.prim.ListConstruct %6251, %int4096_7321 : (!torch.int, !torch.int) -> !torch.list<int>
    %6253 = torch.aten.view %6242, %6252 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %6253, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %6254 = torch.aten.mm %6253, %6250 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1024],f16> -> !torch.vtensor<[?,1024],f16>
    torch.bind_symbolic_shape %6254, [%292], affine_map<()[s0] -> (s0 * 128, 1024)> : !torch.vtensor<[?,1024],f16>
    %int4_7322 = torch.constant.int 4
    %int1024_7323 = torch.constant.int 1024
    %6255 = torch.prim.ListConstruct %int4_7322, %294, %int1024_7323 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6256 = torch.aten.view %6254, %6255 : !torch.vtensor<[?,1024],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1024],f16>
    torch.bind_symbolic_shape %6256, [%292], affine_map<()[s0] -> (4, s0 * 32, 1024)> : !torch.vtensor<[4,?,1024],f16>
    %int-2_7324 = torch.constant.int -2
    %int-1_7325 = torch.constant.int -1
    %6257 = torch.aten.transpose.int %274, %int-2_7324, %int-1_7325 : !torch.vtensor<[1024,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,1024],f16>
    %int4_7326 = torch.constant.int 4
    %6258 = torch.aten.mul.int %int4_7326, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_7327 = torch.constant.int 4096
    %6259 = torch.prim.ListConstruct %6258, %int4096_7327 : (!torch.int, !torch.int) -> !torch.list<int>
    %6260 = torch.aten.view %6242, %6259 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %6260, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %6261 = torch.aten.mm %6260, %6257 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1024],f16> -> !torch.vtensor<[?,1024],f16>
    torch.bind_symbolic_shape %6261, [%292], affine_map<()[s0] -> (s0 * 128, 1024)> : !torch.vtensor<[?,1024],f16>
    %int4_7328 = torch.constant.int 4
    %int1024_7329 = torch.constant.int 1024
    %6262 = torch.prim.ListConstruct %int4_7328, %294, %int1024_7329 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6263 = torch.aten.view %6261, %6262 : !torch.vtensor<[?,1024],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1024],f16>
    torch.bind_symbolic_shape %6263, [%292], affine_map<()[s0] -> (4, s0 * 32, 1024)> : !torch.vtensor<[4,?,1024],f16>
    %int4_7330 = torch.constant.int 4
    %int32_7331 = torch.constant.int 32
    %int128_7332 = torch.constant.int 128
    %6264 = torch.prim.ListConstruct %int4_7330, %294, %int32_7331, %int128_7332 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6265 = torch.aten.view %6249, %6264 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %6265, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int4_7333 = torch.constant.int 4
    %int8_7334 = torch.constant.int 8
    %int128_7335 = torch.constant.int 128
    %6266 = torch.prim.ListConstruct %int4_7333, %294, %int8_7334, %int128_7335 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6267 = torch.aten.view %6256, %6266 : !torch.vtensor<[4,?,1024],f16>, !torch.list<int> -> !torch.vtensor<[4,?,8,128],f16>
    torch.bind_symbolic_shape %6267, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 128)> : !torch.vtensor<[4,?,8,128],f16>
    %int4_7336 = torch.constant.int 4
    %int8_7337 = torch.constant.int 8
    %int128_7338 = torch.constant.int 128
    %6268 = torch.prim.ListConstruct %int4_7336, %294, %int8_7337, %int128_7338 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6269 = torch.aten.view %6263, %6268 : !torch.vtensor<[4,?,1024],f16>, !torch.list<int> -> !torch.vtensor<[4,?,8,128],f16>
    torch.bind_symbolic_shape %6269, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 128)> : !torch.vtensor<[4,?,8,128],f16>
    %int131072_7339 = torch.constant.int 131072
    %none_7340 = torch.constant.none
    %none_7341 = torch.constant.none
    %cpu_7342 = torch.constant.device "cpu"
    %false_7343 = torch.constant.bool false
    %6270 = torch.aten.arange %int131072_7339, %none_7340, %none_7341, %cpu_7342, %false_7343 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64>
    %int0_7344 = torch.constant.int 0
    %int128_7345 = torch.constant.int 128
    %int2_7346 = torch.constant.int 2
    %none_7347 = torch.constant.none
    %none_7348 = torch.constant.none
    %cpu_7349 = torch.constant.device "cpu"
    %false_7350 = torch.constant.bool false
    %6271 = torch.aten.arange.start_step %int0_7344, %int128_7345, %int2_7346, %none_7347, %none_7348, %cpu_7349, %false_7350 : !torch.int, !torch.int, !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64>
    %int0_7351 = torch.constant.int 0
    %int0_7352 = torch.constant.int 0
    %int64_7353 = torch.constant.int 64
    %int1_7354 = torch.constant.int 1
    %6272 = torch.aten.slice.Tensor %6271, %int0_7351, %int0_7352, %int64_7353, %int1_7354 : !torch.vtensor<[64],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[64],si64>
    %int6_7355 = torch.constant.int 6
    %6273 = torch.prims.convert_element_type %6272, %int6_7355 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32>
    %int128_7356 = torch.constant.int 128
    %6274 = torch.aten.div.Scalar %6273, %int128_7356 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32>
    %float5.000000e05_7357 = torch.constant.float 5.000000e+05
    %6275 = torch.aten.pow.Scalar %float5.000000e05_7357, %6274 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %6276 = torch.aten.reciprocal %6275 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %float1.000000e00_7358 = torch.constant.float 1.000000e+00
    %6277 = torch.aten.mul.Scalar %6276, %float1.000000e00_7358 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32>
    %int131072_7359 = torch.constant.int 131072
    %int1_7360 = torch.constant.int 1
    %6278 = torch.prim.ListConstruct %int131072_7359, %int1_7360 : (!torch.int, !torch.int) -> !torch.list<int>
    %6279 = torch.aten.view %6270, %6278 : !torch.vtensor<[131072],si64>, !torch.list<int> -> !torch.vtensor<[131072,1],si64>
    %6280 = torch.aten.mul.Tensor %6279, %6277 : !torch.vtensor<[131072,1],si64>, !torch.vtensor<[64],f32> -> !torch.vtensor<[131072,64],f32>
    %6281 = torch.aten.cos %6280 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %6282 = torch.aten.sin %6280 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %6283 = torch.aten.complex %6281, %6282 : !torch.vtensor<[131072,64],f32>, !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],complex<f32>>
    %int1_7361 = torch.constant.int 1
    %6284 = torch.aten.size.int %6249, %int1_7361 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.int
    %int0_7362 = torch.constant.int 0
    %6285 = torch.aten.add.int %int0_7362, %6284 : !torch.int, !torch.int -> !torch.int
    %int0_7363 = torch.constant.int 0
    %int0_7364 = torch.constant.int 0
    %int1_7365 = torch.constant.int 1
    %6286 = torch.aten.slice.Tensor %6283, %int0_7363, %int0_7364, %6285, %int1_7365 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %6286, [%292], affine_map<()[s0] -> (s0 * 32, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_7366 = torch.constant.int 1
    %int0_7367 = torch.constant.int 0
    %int9223372036854775807_7368 = torch.constant.int 9223372036854775807
    %int1_7369 = torch.constant.int 1
    %6287 = torch.aten.slice.Tensor %6286, %int1_7366, %int0_7367, %int9223372036854775807_7368, %int1_7369 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %6287, [%292], affine_map<()[s0] -> (s0 * 32, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_7370 = torch.constant.int 0
    %6288 = torch.aten.unsqueeze %6287, %int0_7370 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %6288, [%292], affine_map<()[s0] -> (1, s0 * 32, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_7371 = torch.constant.int 2
    %6289 = torch.aten.unsqueeze %6288, %int2_7371 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %6289, [%292], affine_map<()[s0] -> (1, s0 * 32, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_7372 = torch.constant.int 3
    %int0_7373 = torch.constant.int 0
    %int9223372036854775807_7374 = torch.constant.int 9223372036854775807
    %int1_7375 = torch.constant.int 1
    %6290 = torch.aten.slice.Tensor %6289, %int3_7372, %int0_7373, %int9223372036854775807_7374, %int1_7375 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %6290, [%292], affine_map<()[s0] -> (1, s0 * 32, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %6291 = torch_c.to_builtin_tensor %6265 : !torch.vtensor<[4,?,32,128],f16> -> tensor<4x?x32x128xf16>
    %c1_7376 = arith.constant 1 : index
    %dim_7377 = tensor.dim %6291, %c1_7376 : tensor<4x?x32x128xf16>
    %6292 = flow.tensor.bitcast %6291 : tensor<4x?x32x128xf16>{%dim_7377} -> tensor<4x?x32x64xcomplex<f16>>{%dim_7377}
    %6293 = torch_c.from_builtin_tensor %6292 : tensor<4x?x32x64xcomplex<f16>> -> !torch.vtensor<[4,?,32,64],complex<f16>>
    torch.bind_symbolic_shape %6293, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 64)> : !torch.vtensor<[4,?,32,64],complex<f16>>
    %6294 = torch.aten.mul.Tensor %6293, %6290 : !torch.vtensor<[4,?,32,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,32,64],complex<f32>>
    torch.bind_symbolic_shape %6294, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 64)> : !torch.vtensor<[4,?,32,64],complex<f32>>
    %6295 = torch_c.to_builtin_tensor %6294 : !torch.vtensor<[4,?,32,64],complex<f32>> -> tensor<4x?x32x64xcomplex<f32>>
    %c1_7378 = arith.constant 1 : index
    %dim_7379 = tensor.dim %6295, %c1_7378 : tensor<4x?x32x64xcomplex<f32>>
    %6296 = flow.tensor.bitcast %6295 : tensor<4x?x32x64xcomplex<f32>>{%dim_7379} -> tensor<4x?x32x128xf32>{%dim_7379}
    %6297 = torch_c.from_builtin_tensor %6296 : tensor<4x?x32x128xf32> -> !torch.vtensor<[4,?,32,128],f32>
    torch.bind_symbolic_shape %6297, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f32>
    %int5_7380 = torch.constant.int 5
    %6298 = torch.prims.convert_element_type %6297, %int5_7380 : !torch.vtensor<[4,?,32,128],f32>, !torch.int -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %6298, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int131072_7381 = torch.constant.int 131072
    %none_7382 = torch.constant.none
    %none_7383 = torch.constant.none
    %cpu_7384 = torch.constant.device "cpu"
    %false_7385 = torch.constant.bool false
    %6299 = torch.aten.arange %int131072_7381, %none_7382, %none_7383, %cpu_7384, %false_7385 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64>
    %int0_7386 = torch.constant.int 0
    %int128_7387 = torch.constant.int 128
    %int2_7388 = torch.constant.int 2
    %none_7389 = torch.constant.none
    %none_7390 = torch.constant.none
    %cpu_7391 = torch.constant.device "cpu"
    %false_7392 = torch.constant.bool false
    %6300 = torch.aten.arange.start_step %int0_7386, %int128_7387, %int2_7388, %none_7389, %none_7390, %cpu_7391, %false_7392 : !torch.int, !torch.int, !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64>
    %int0_7393 = torch.constant.int 0
    %int0_7394 = torch.constant.int 0
    %int64_7395 = torch.constant.int 64
    %int1_7396 = torch.constant.int 1
    %6301 = torch.aten.slice.Tensor %6300, %int0_7393, %int0_7394, %int64_7395, %int1_7396 : !torch.vtensor<[64],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[64],si64>
    %int6_7397 = torch.constant.int 6
    %6302 = torch.prims.convert_element_type %6301, %int6_7397 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32>
    %int128_7398 = torch.constant.int 128
    %6303 = torch.aten.div.Scalar %6302, %int128_7398 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32>
    %float5.000000e05_7399 = torch.constant.float 5.000000e+05
    %6304 = torch.aten.pow.Scalar %float5.000000e05_7399, %6303 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %6305 = torch.aten.reciprocal %6304 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %float1.000000e00_7400 = torch.constant.float 1.000000e+00
    %6306 = torch.aten.mul.Scalar %6305, %float1.000000e00_7400 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32>
    %int131072_7401 = torch.constant.int 131072
    %int1_7402 = torch.constant.int 1
    %6307 = torch.prim.ListConstruct %int131072_7401, %int1_7402 : (!torch.int, !torch.int) -> !torch.list<int>
    %6308 = torch.aten.view %6299, %6307 : !torch.vtensor<[131072],si64>, !torch.list<int> -> !torch.vtensor<[131072,1],si64>
    %6309 = torch.aten.mul.Tensor %6308, %6306 : !torch.vtensor<[131072,1],si64>, !torch.vtensor<[64],f32> -> !torch.vtensor<[131072,64],f32>
    %6310 = torch.aten.cos %6309 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %6311 = torch.aten.sin %6309 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %6312 = torch.aten.complex %6310, %6311 : !torch.vtensor<[131072,64],f32>, !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],complex<f32>>
    %int1_7403 = torch.constant.int 1
    %6313 = torch.aten.size.int %6256, %int1_7403 : !torch.vtensor<[4,?,1024],f16>, !torch.int -> !torch.int
    %int0_7404 = torch.constant.int 0
    %6314 = torch.aten.add.int %int0_7404, %6313 : !torch.int, !torch.int -> !torch.int
    %int0_7405 = torch.constant.int 0
    %int0_7406 = torch.constant.int 0
    %int1_7407 = torch.constant.int 1
    %6315 = torch.aten.slice.Tensor %6312, %int0_7405, %int0_7406, %6314, %int1_7407 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %6315, [%292], affine_map<()[s0] -> (s0 * 32, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_7408 = torch.constant.int 1
    %int0_7409 = torch.constant.int 0
    %int9223372036854775807_7410 = torch.constant.int 9223372036854775807
    %int1_7411 = torch.constant.int 1
    %6316 = torch.aten.slice.Tensor %6315, %int1_7408, %int0_7409, %int9223372036854775807_7410, %int1_7411 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %6316, [%292], affine_map<()[s0] -> (s0 * 32, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_7412 = torch.constant.int 0
    %6317 = torch.aten.unsqueeze %6316, %int0_7412 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %6317, [%292], affine_map<()[s0] -> (1, s0 * 32, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_7413 = torch.constant.int 2
    %6318 = torch.aten.unsqueeze %6317, %int2_7413 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %6318, [%292], affine_map<()[s0] -> (1, s0 * 32, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_7414 = torch.constant.int 3
    %int0_7415 = torch.constant.int 0
    %int9223372036854775807_7416 = torch.constant.int 9223372036854775807
    %int1_7417 = torch.constant.int 1
    %6319 = torch.aten.slice.Tensor %6318, %int3_7414, %int0_7415, %int9223372036854775807_7416, %int1_7417 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %6319, [%292], affine_map<()[s0] -> (1, s0 * 32, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %6320 = torch_c.to_builtin_tensor %6267 : !torch.vtensor<[4,?,8,128],f16> -> tensor<4x?x8x128xf16>
    %c1_7418 = arith.constant 1 : index
    %dim_7419 = tensor.dim %6320, %c1_7418 : tensor<4x?x8x128xf16>
    %6321 = flow.tensor.bitcast %6320 : tensor<4x?x8x128xf16>{%dim_7419} -> tensor<4x?x8x64xcomplex<f16>>{%dim_7419}
    %6322 = torch_c.from_builtin_tensor %6321 : tensor<4x?x8x64xcomplex<f16>> -> !torch.vtensor<[4,?,8,64],complex<f16>>
    torch.bind_symbolic_shape %6322, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 64)> : !torch.vtensor<[4,?,8,64],complex<f16>>
    %6323 = torch.aten.mul.Tensor %6322, %6319 : !torch.vtensor<[4,?,8,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,8,64],complex<f32>>
    torch.bind_symbolic_shape %6323, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 64)> : !torch.vtensor<[4,?,8,64],complex<f32>>
    %6324 = torch_c.to_builtin_tensor %6323 : !torch.vtensor<[4,?,8,64],complex<f32>> -> tensor<4x?x8x64xcomplex<f32>>
    %c1_7420 = arith.constant 1 : index
    %dim_7421 = tensor.dim %6324, %c1_7420 : tensor<4x?x8x64xcomplex<f32>>
    %6325 = flow.tensor.bitcast %6324 : tensor<4x?x8x64xcomplex<f32>>{%dim_7421} -> tensor<4x?x8x128xf32>{%dim_7421}
    %6326 = torch_c.from_builtin_tensor %6325 : tensor<4x?x8x128xf32> -> !torch.vtensor<[4,?,8,128],f32>
    torch.bind_symbolic_shape %6326, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 128)> : !torch.vtensor<[4,?,8,128],f32>
    %int5_7422 = torch.constant.int 5
    %6327 = torch.prims.convert_element_type %6326, %int5_7422 : !torch.vtensor<[4,?,8,128],f32>, !torch.int -> !torch.vtensor<[4,?,8,128],f16>
    torch.bind_symbolic_shape %6327, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 128)> : !torch.vtensor<[4,?,8,128],f16>
    %int64_7423 = torch.constant.int 64
    %6328 = torch.aten.mul.Scalar %arg2, %int64_7423 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %6328, [%292], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int60 = torch.constant.int 60
    %int1_7424 = torch.constant.int 1
    %6329 = torch.aten.add.Scalar %6328, %int60, %int1_7424 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %6329, [%292], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int4_7425 = torch.constant.int 4
    %int32_7426 = torch.constant.int 32
    %int8_7427 = torch.constant.int 8
    %int128_7428 = torch.constant.int 128
    %6330 = torch.prim.ListConstruct %int4_7425, %425, %int32_7426, %int8_7427, %int128_7428 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6331 = torch.aten.view %6327, %6330 : !torch.vtensor<[4,?,8,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,8,128],f16>
    torch.bind_symbolic_shape %6331, [%292], affine_map<()[s0] -> (4, s0, 32, 8, 128)> : !torch.vtensor<[4,?,32,8,128],f16>
    %int4_7429 = torch.constant.int 4
    %6332 = torch.aten.mul.int %int4_7429, %425 : !torch.int, !torch.int -> !torch.int
    %int32_7430 = torch.constant.int 32
    %int8_7431 = torch.constant.int 8
    %int128_7432 = torch.constant.int 128
    %6333 = torch.prim.ListConstruct %6332, %int32_7430, %int8_7431, %int128_7432 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6334 = torch.aten.view %6331, %6333 : !torch.vtensor<[4,?,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %6334, [%292], affine_map<()[s0] -> (s0 * 4, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int4_7433 = torch.constant.int 4
    %6335 = torch.aten.mul.int %int4_7433, %425 : !torch.int, !torch.int -> !torch.int
    %6336 = torch.prim.ListConstruct %6335 : (!torch.int) -> !torch.list<int>
    %6337 = torch.aten.view %6329, %6336 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %6337, [%292], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int32_7434 = torch.constant.int 32
    %int2_7435 = torch.constant.int 2
    %int32_7436 = torch.constant.int 32
    %int8_7437 = torch.constant.int 8
    %int128_7438 = torch.constant.int 128
    %6338 = torch.prim.ListConstruct %416, %int32_7434, %int2_7435, %int32_7436, %int8_7437, %int128_7438 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6339 = torch.aten.view %6171, %6338 : !torch.vtensor<[?,2097152],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f16>
    torch.bind_symbolic_shape %6339, [%293], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f16>
    %int32_7439 = torch.constant.int 32
    %6340 = torch.aten.mul.int %416, %int32_7439 : !torch.int, !torch.int -> !torch.int
    %int2_7440 = torch.constant.int 2
    %6341 = torch.aten.mul.int %6340, %int2_7440 : !torch.int, !torch.int -> !torch.int
    %int32_7441 = torch.constant.int 32
    %int8_7442 = torch.constant.int 8
    %int128_7443 = torch.constant.int 128
    %6342 = torch.prim.ListConstruct %6341, %int32_7441, %int8_7442, %int128_7443 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6343 = torch.aten.view %6339, %6342 : !torch.vtensor<[?,32,2,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %6343, [%293], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %6344 = torch.prim.ListConstruct %6337 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_7444 = torch.constant.bool false
    %6345 = torch.aten.index_put %6343, %6344, %6334, %false_7444 : !torch.vtensor<[?,32,8,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,32,8,128],f16>, !torch.bool -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %6345, [%293], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int32_7445 = torch.constant.int 32
    %int2_7446 = torch.constant.int 2
    %int32_7447 = torch.constant.int 32
    %int8_7448 = torch.constant.int 8
    %int128_7449 = torch.constant.int 128
    %6346 = torch.prim.ListConstruct %416, %int32_7445, %int2_7446, %int32_7447, %int8_7448, %int128_7449 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6347 = torch.aten.view %6345, %6346 : !torch.vtensor<[?,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f16>
    torch.bind_symbolic_shape %6347, [%293], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f16>
    %int2097152_7450 = torch.constant.int 2097152
    %6348 = torch.prim.ListConstruct %416, %int2097152_7450 : (!torch.int, !torch.int) -> !torch.list<int>
    %6349 = torch.aten.view %6347, %6348 : !torch.vtensor<[?,32,2,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,2097152],f16>
    torch.bind_symbolic_shape %6349, [%293], affine_map<()[s0] -> (s0, 2097152)> : !torch.vtensor<[?,2097152],f16>
    %int32_7451 = torch.constant.int 32
    %int2_7452 = torch.constant.int 2
    %int32_7453 = torch.constant.int 32
    %int8_7454 = torch.constant.int 8
    %int128_7455 = torch.constant.int 128
    %6350 = torch.prim.ListConstruct %416, %int32_7451, %int2_7452, %int32_7453, %int8_7454, %int128_7455 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6351 = torch.aten.view %6349, %6350 : !torch.vtensor<[?,2097152],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f16>
    torch.bind_symbolic_shape %6351, [%293], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f16>
    %int32_7456 = torch.constant.int 32
    %int8_7457 = torch.constant.int 8
    %int128_7458 = torch.constant.int 128
    %6352 = torch.prim.ListConstruct %6341, %int32_7456, %int8_7457, %int128_7458 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6353 = torch.aten.view %6351, %6352 : !torch.vtensor<[?,32,2,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %6353, [%293], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int4_7459 = torch.constant.int 4
    %int32_7460 = torch.constant.int 32
    %int8_7461 = torch.constant.int 8
    %int128_7462 = torch.constant.int 128
    %6354 = torch.prim.ListConstruct %int4_7459, %425, %int32_7460, %int8_7461, %int128_7462 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6355 = torch.aten.view %6269, %6354 : !torch.vtensor<[4,?,8,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,8,128],f16>
    torch.bind_symbolic_shape %6355, [%292], affine_map<()[s0] -> (4, s0, 32, 8, 128)> : !torch.vtensor<[4,?,32,8,128],f16>
    %int4_7463 = torch.constant.int 4
    %6356 = torch.aten.mul.int %int4_7463, %425 : !torch.int, !torch.int -> !torch.int
    %int32_7464 = torch.constant.int 32
    %int8_7465 = torch.constant.int 8
    %int128_7466 = torch.constant.int 128
    %6357 = torch.prim.ListConstruct %6356, %int32_7464, %int8_7465, %int128_7466 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6358 = torch.aten.view %6355, %6357 : !torch.vtensor<[4,?,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %6358, [%292], affine_map<()[s0] -> (s0 * 4, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int1_7467 = torch.constant.int 1
    %int1_7468 = torch.constant.int 1
    %6359 = torch.aten.add.Scalar %6329, %int1_7467, %int1_7468 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %6359, [%292], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int4_7469 = torch.constant.int 4
    %6360 = torch.aten.mul.int %int4_7469, %425 : !torch.int, !torch.int -> !torch.int
    %6361 = torch.prim.ListConstruct %6360 : (!torch.int) -> !torch.list<int>
    %6362 = torch.aten.view %6359, %6361 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %6362, [%292], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %6363 = torch.prim.ListConstruct %6362 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_7470 = torch.constant.bool false
    %6364 = torch.aten.index_put %6353, %6363, %6358, %false_7470 : !torch.vtensor<[?,32,8,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,32,8,128],f16>, !torch.bool -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %6364, [%293], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int32_7471 = torch.constant.int 32
    %int2_7472 = torch.constant.int 2
    %int32_7473 = torch.constant.int 32
    %int8_7474 = torch.constant.int 8
    %int128_7475 = torch.constant.int 128
    %6365 = torch.prim.ListConstruct %416, %int32_7471, %int2_7472, %int32_7473, %int8_7474, %int128_7475 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6366 = torch.aten.view %6364, %6365 : !torch.vtensor<[?,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f16>
    torch.bind_symbolic_shape %6366, [%293], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f16>
    %int2097152_7476 = torch.constant.int 2097152
    %6367 = torch.prim.ListConstruct %416, %int2097152_7476 : (!torch.int, !torch.int) -> !torch.list<int>
    %6368 = torch.aten.view %6366, %6367 : !torch.vtensor<[?,32,2,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,2097152],f16>
    torch.bind_symbolic_shape %6368, [%293], affine_map<()[s0] -> (s0, 2097152)> : !torch.vtensor<[?,2097152],f16>
    %int-2_7477 = torch.constant.int -2
    %6369 = torch.aten.unsqueeze %6327, %int-2_7477 : !torch.vtensor<[4,?,8,128],f16>, !torch.int -> !torch.vtensor<[4,?,8,1,128],f16>
    torch.bind_symbolic_shape %6369, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 1, 128)> : !torch.vtensor<[4,?,8,1,128],f16>
    %int4_7478 = torch.constant.int 4
    %int8_7479 = torch.constant.int 8
    %int4_7480 = torch.constant.int 4
    %int128_7481 = torch.constant.int 128
    %6370 = torch.prim.ListConstruct %int4_7478, %6313, %int8_7479, %int4_7480, %int128_7481 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_7482 = torch.constant.bool false
    %6371 = torch.aten.expand %6369, %6370, %false_7482 : !torch.vtensor<[4,?,8,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,8,4,128],f16>
    torch.bind_symbolic_shape %6371, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 4, 128)> : !torch.vtensor<[4,?,8,4,128],f16>
    %int0_7483 = torch.constant.int 0
    %6372 = torch.aten.clone %6371, %int0_7483 : !torch.vtensor<[4,?,8,4,128],f16>, !torch.int -> !torch.vtensor<[4,?,8,4,128],f16>
    torch.bind_symbolic_shape %6372, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 4, 128)> : !torch.vtensor<[4,?,8,4,128],f16>
    %int4_7484 = torch.constant.int 4
    %int32_7485 = torch.constant.int 32
    %int128_7486 = torch.constant.int 128
    %6373 = torch.prim.ListConstruct %int4_7484, %6313, %int32_7485, %int128_7486 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6374 = torch.aten._unsafe_view %6372, %6373 : !torch.vtensor<[4,?,8,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %6374, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int-2_7487 = torch.constant.int -2
    %6375 = torch.aten.unsqueeze %6269, %int-2_7487 : !torch.vtensor<[4,?,8,128],f16>, !torch.int -> !torch.vtensor<[4,?,8,1,128],f16>
    torch.bind_symbolic_shape %6375, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 1, 128)> : !torch.vtensor<[4,?,8,1,128],f16>
    %int1_7488 = torch.constant.int 1
    %6376 = torch.aten.size.int %6263, %int1_7488 : !torch.vtensor<[4,?,1024],f16>, !torch.int -> !torch.int
    %int4_7489 = torch.constant.int 4
    %int8_7490 = torch.constant.int 8
    %int4_7491 = torch.constant.int 4
    %int128_7492 = torch.constant.int 128
    %6377 = torch.prim.ListConstruct %int4_7489, %6376, %int8_7490, %int4_7491, %int128_7492 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_7493 = torch.constant.bool false
    %6378 = torch.aten.expand %6375, %6377, %false_7493 : !torch.vtensor<[4,?,8,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,8,4,128],f16>
    torch.bind_symbolic_shape %6378, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 4, 128)> : !torch.vtensor<[4,?,8,4,128],f16>
    %int0_7494 = torch.constant.int 0
    %6379 = torch.aten.clone %6378, %int0_7494 : !torch.vtensor<[4,?,8,4,128],f16>, !torch.int -> !torch.vtensor<[4,?,8,4,128],f16>
    torch.bind_symbolic_shape %6379, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 4, 128)> : !torch.vtensor<[4,?,8,4,128],f16>
    %int4_7495 = torch.constant.int 4
    %int32_7496 = torch.constant.int 32
    %int128_7497 = torch.constant.int 128
    %6380 = torch.prim.ListConstruct %int4_7495, %6376, %int32_7496, %int128_7497 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6381 = torch.aten._unsafe_view %6379, %6380 : !torch.vtensor<[4,?,8,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %6381, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int1_7498 = torch.constant.int 1
    %int2_7499 = torch.constant.int 2
    %6382 = torch.aten.transpose.int %6298, %int1_7498, %int2_7499 : !torch.vtensor<[4,?,32,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,32,?,128],f16>
    torch.bind_symbolic_shape %6382, [%292], affine_map<()[s0] -> (4, 32, s0 * 32, 128)> : !torch.vtensor<[4,32,?,128],f16>
    %int1_7500 = torch.constant.int 1
    %int2_7501 = torch.constant.int 2
    %6383 = torch.aten.transpose.int %6374, %int1_7500, %int2_7501 : !torch.vtensor<[4,?,32,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,32,?,128],f16>
    torch.bind_symbolic_shape %6383, [%292], affine_map<()[s0] -> (4, 32, s0 * 32, 128)> : !torch.vtensor<[4,32,?,128],f16>
    %int1_7502 = torch.constant.int 1
    %int2_7503 = torch.constant.int 2
    %6384 = torch.aten.transpose.int %6381, %int1_7502, %int2_7503 : !torch.vtensor<[4,?,32,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,32,?,128],f16>
    torch.bind_symbolic_shape %6384, [%292], affine_map<()[s0] -> (4, 32, s0 * 32, 128)> : !torch.vtensor<[4,32,?,128],f16>
    %float0.000000e00_7504 = torch.constant.float 0.000000e+00
    %false_7505 = torch.constant.bool false
    %none_7506 = torch.constant.none
    %6385:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%6382, %6383, %6384, %float0.000000e00_7504, %false_7505, %320, %none_7506) : (!torch.vtensor<[4,32,?,128],f16>, !torch.vtensor<[4,32,?,128],f16>, !torch.vtensor<[4,32,?,128],f16>, !torch.float, !torch.bool, !torch.vtensor<[4,1,?,?],f16>, !torch.none) -> (!torch.vtensor<[4,32,?,128],f16>, !torch.vtensor<[4,32,?],f32>) 
    torch.bind_symbolic_shape %6385#0, [%292], affine_map<()[s0] -> (4, 32, s0 * 32, 128)> : !torch.vtensor<[4,32,?,128],f16>
    %int1_7507 = torch.constant.int 1
    %int2_7508 = torch.constant.int 2
    %6386 = torch.aten.transpose.int %6385#0, %int1_7507, %int2_7508 : !torch.vtensor<[4,32,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %6386, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int4_7509 = torch.constant.int 4
    %int4096_7510 = torch.constant.int 4096
    %6387 = torch.prim.ListConstruct %int4_7509, %6284, %int4096_7510 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6388 = torch.aten.view %6386, %6387 : !torch.vtensor<[4,?,32,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %6388, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int-2_7511 = torch.constant.int -2
    %int-1_7512 = torch.constant.int -1
    %6389 = torch.aten.transpose.int %275, %int-2_7511, %int-1_7512 : !torch.vtensor<[4096,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,4096],f16>
    %int4_7513 = torch.constant.int 4
    %6390 = torch.aten.mul.int %int4_7513, %6284 : !torch.int, !torch.int -> !torch.int
    %int4096_7514 = torch.constant.int 4096
    %6391 = torch.prim.ListConstruct %6390, %int4096_7514 : (!torch.int, !torch.int) -> !torch.list<int>
    %6392 = torch.aten.view %6388, %6391 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %6392, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %6393 = torch.aten.mm %6392, %6389 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %6393, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_7515 = torch.constant.int 4
    %int4096_7516 = torch.constant.int 4096
    %6394 = torch.prim.ListConstruct %int4_7515, %6284, %int4096_7516 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6395 = torch.aten.view %6393, %6394 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %6395, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_7517 = torch.constant.int 1
    %6396 = torch.aten.add.Tensor %6233, %6395, %int1_7517 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %6396, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int6_7518 = torch.constant.int 6
    %6397 = torch.prims.convert_element_type %6396, %int6_7518 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %6397, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_7519 = torch.constant.int 2
    %6398 = torch.aten.pow.Tensor_Scalar %6397, %int2_7519 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %6398, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int-1_7520 = torch.constant.int -1
    %6399 = torch.prim.ListConstruct %int-1_7520 : (!torch.int) -> !torch.list<int>
    %true_7521 = torch.constant.bool true
    %none_7522 = torch.constant.none
    %6400 = torch.aten.mean.dim %6398, %6399, %true_7521, %none_7522 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %6400, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_7523 = torch.constant.float 9.9999997473787516E-6
    %int1_7524 = torch.constant.int 1
    %6401 = torch.aten.add.Scalar %6400, %float9.999990e-06_7523, %int1_7524 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %6401, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %6402 = torch.aten.rsqrt %6401 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %6402, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %6403 = torch.aten.mul.Tensor %6397, %6402 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %6403, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %6404 = torch.aten.mul.Tensor %276, %6403 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %6404, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int5_7525 = torch.constant.int 5
    %6405 = torch.prims.convert_element_type %6404, %int5_7525 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %6405, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int-2_7526 = torch.constant.int -2
    %int-1_7527 = torch.constant.int -1
    %6406 = torch.aten.transpose.int %277, %int-2_7526, %int-1_7527 : !torch.vtensor<[14336,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,14336],f16>
    %int4_7528 = torch.constant.int 4
    %6407 = torch.aten.mul.int %int4_7528, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_7529 = torch.constant.int 4096
    %6408 = torch.prim.ListConstruct %6407, %int4096_7529 : (!torch.int, !torch.int) -> !torch.list<int>
    %6409 = torch.aten.view %6405, %6408 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %6409, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %6410 = torch.aten.mm %6409, %6406 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,14336],f16> -> !torch.vtensor<[?,14336],f16>
    torch.bind_symbolic_shape %6410, [%292], affine_map<()[s0] -> (s0 * 128, 14336)> : !torch.vtensor<[?,14336],f16>
    %int4_7530 = torch.constant.int 4
    %int14336_7531 = torch.constant.int 14336
    %6411 = torch.prim.ListConstruct %int4_7530, %294, %int14336_7531 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6412 = torch.aten.view %6410, %6411 : !torch.vtensor<[?,14336],f16>, !torch.list<int> -> !torch.vtensor<[4,?,14336],f16>
    torch.bind_symbolic_shape %6412, [%292], affine_map<()[s0] -> (4, s0 * 32, 14336)> : !torch.vtensor<[4,?,14336],f16>
    %6413 = torch.aten.silu %6412 : !torch.vtensor<[4,?,14336],f16> -> !torch.vtensor<[4,?,14336],f16>
    torch.bind_symbolic_shape %6413, [%292], affine_map<()[s0] -> (4, s0 * 32, 14336)> : !torch.vtensor<[4,?,14336],f16>
    %int-2_7532 = torch.constant.int -2
    %int-1_7533 = torch.constant.int -1
    %6414 = torch.aten.transpose.int %278, %int-2_7532, %int-1_7533 : !torch.vtensor<[14336,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,14336],f16>
    %int4_7534 = torch.constant.int 4
    %6415 = torch.aten.mul.int %int4_7534, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_7535 = torch.constant.int 4096
    %6416 = torch.prim.ListConstruct %6415, %int4096_7535 : (!torch.int, !torch.int) -> !torch.list<int>
    %6417 = torch.aten.view %6405, %6416 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %6417, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %6418 = torch.aten.mm %6417, %6414 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,14336],f16> -> !torch.vtensor<[?,14336],f16>
    torch.bind_symbolic_shape %6418, [%292], affine_map<()[s0] -> (s0 * 128, 14336)> : !torch.vtensor<[?,14336],f16>
    %int4_7536 = torch.constant.int 4
    %int14336_7537 = torch.constant.int 14336
    %6419 = torch.prim.ListConstruct %int4_7536, %294, %int14336_7537 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6420 = torch.aten.view %6418, %6419 : !torch.vtensor<[?,14336],f16>, !torch.list<int> -> !torch.vtensor<[4,?,14336],f16>
    torch.bind_symbolic_shape %6420, [%292], affine_map<()[s0] -> (4, s0 * 32, 14336)> : !torch.vtensor<[4,?,14336],f16>
    %6421 = torch.aten.mul.Tensor %6413, %6420 : !torch.vtensor<[4,?,14336],f16>, !torch.vtensor<[4,?,14336],f16> -> !torch.vtensor<[4,?,14336],f16>
    torch.bind_symbolic_shape %6421, [%292], affine_map<()[s0] -> (4, s0 * 32, 14336)> : !torch.vtensor<[4,?,14336],f16>
    %int-2_7538 = torch.constant.int -2
    %int-1_7539 = torch.constant.int -1
    %6422 = torch.aten.transpose.int %279, %int-2_7538, %int-1_7539 : !torch.vtensor<[4096,14336],f16>, !torch.int, !torch.int -> !torch.vtensor<[14336,4096],f16>
    %int1_7540 = torch.constant.int 1
    %6423 = torch.aten.size.int %6412, %int1_7540 : !torch.vtensor<[4,?,14336],f16>, !torch.int -> !torch.int
    %int4_7541 = torch.constant.int 4
    %6424 = torch.aten.mul.int %int4_7541, %6423 : !torch.int, !torch.int -> !torch.int
    %int14336_7542 = torch.constant.int 14336
    %6425 = torch.prim.ListConstruct %6424, %int14336_7542 : (!torch.int, !torch.int) -> !torch.list<int>
    %6426 = torch.aten.view %6421, %6425 : !torch.vtensor<[4,?,14336],f16>, !torch.list<int> -> !torch.vtensor<[?,14336],f16>
    torch.bind_symbolic_shape %6426, [%292], affine_map<()[s0] -> (s0 * 128, 14336)> : !torch.vtensor<[?,14336],f16>
    %6427 = torch.aten.mm %6426, %6422 : !torch.vtensor<[?,14336],f16>, !torch.vtensor<[14336,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %6427, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_7543 = torch.constant.int 4
    %int4096_7544 = torch.constant.int 4096
    %6428 = torch.prim.ListConstruct %int4_7543, %6423, %int4096_7544 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6429 = torch.aten.view %6427, %6428 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %6429, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_7545 = torch.constant.int 1
    %6430 = torch.aten.add.Tensor %6396, %6429, %int1_7545 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %6430, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int6_7546 = torch.constant.int 6
    %6431 = torch.prims.convert_element_type %6430, %int6_7546 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %6431, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_7547 = torch.constant.int 2
    %6432 = torch.aten.pow.Tensor_Scalar %6431, %int2_7547 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %6432, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int-1_7548 = torch.constant.int -1
    %6433 = torch.prim.ListConstruct %int-1_7548 : (!torch.int) -> !torch.list<int>
    %true_7549 = torch.constant.bool true
    %none_7550 = torch.constant.none
    %6434 = torch.aten.mean.dim %6432, %6433, %true_7549, %none_7550 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %6434, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_7551 = torch.constant.float 9.9999997473787516E-6
    %int1_7552 = torch.constant.int 1
    %6435 = torch.aten.add.Scalar %6434, %float9.999990e-06_7551, %int1_7552 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %6435, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %6436 = torch.aten.rsqrt %6435 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %6436, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %6437 = torch.aten.mul.Tensor %6431, %6436 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %6437, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %6438 = torch.aten.mul.Tensor %280, %6437 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %6438, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int5_7553 = torch.constant.int 5
    %6439 = torch.prims.convert_element_type %6438, %int5_7553 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %6439, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int-2_7554 = torch.constant.int -2
    %int-1_7555 = torch.constant.int -1
    %6440 = torch.aten.transpose.int %281, %int-2_7554, %int-1_7555 : !torch.vtensor<[4096,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,4096],f16>
    %int4_7556 = torch.constant.int 4
    %6441 = torch.aten.mul.int %int4_7556, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_7557 = torch.constant.int 4096
    %6442 = torch.prim.ListConstruct %6441, %int4096_7557 : (!torch.int, !torch.int) -> !torch.list<int>
    %6443 = torch.aten.view %6439, %6442 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %6443, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %6444 = torch.aten.mm %6443, %6440 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %6444, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_7558 = torch.constant.int 4
    %int4096_7559 = torch.constant.int 4096
    %6445 = torch.prim.ListConstruct %int4_7558, %294, %int4096_7559 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6446 = torch.aten.view %6444, %6445 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %6446, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int-2_7560 = torch.constant.int -2
    %int-1_7561 = torch.constant.int -1
    %6447 = torch.aten.transpose.int %282, %int-2_7560, %int-1_7561 : !torch.vtensor<[1024,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,1024],f16>
    %int4_7562 = torch.constant.int 4
    %6448 = torch.aten.mul.int %int4_7562, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_7563 = torch.constant.int 4096
    %6449 = torch.prim.ListConstruct %6448, %int4096_7563 : (!torch.int, !torch.int) -> !torch.list<int>
    %6450 = torch.aten.view %6439, %6449 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %6450, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %6451 = torch.aten.mm %6450, %6447 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1024],f16> -> !torch.vtensor<[?,1024],f16>
    torch.bind_symbolic_shape %6451, [%292], affine_map<()[s0] -> (s0 * 128, 1024)> : !torch.vtensor<[?,1024],f16>
    %int4_7564 = torch.constant.int 4
    %int1024_7565 = torch.constant.int 1024
    %6452 = torch.prim.ListConstruct %int4_7564, %294, %int1024_7565 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6453 = torch.aten.view %6451, %6452 : !torch.vtensor<[?,1024],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1024],f16>
    torch.bind_symbolic_shape %6453, [%292], affine_map<()[s0] -> (4, s0 * 32, 1024)> : !torch.vtensor<[4,?,1024],f16>
    %int-2_7566 = torch.constant.int -2
    %int-1_7567 = torch.constant.int -1
    %6454 = torch.aten.transpose.int %283, %int-2_7566, %int-1_7567 : !torch.vtensor<[1024,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,1024],f16>
    %int4_7568 = torch.constant.int 4
    %6455 = torch.aten.mul.int %int4_7568, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_7569 = torch.constant.int 4096
    %6456 = torch.prim.ListConstruct %6455, %int4096_7569 : (!torch.int, !torch.int) -> !torch.list<int>
    %6457 = torch.aten.view %6439, %6456 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %6457, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %6458 = torch.aten.mm %6457, %6454 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1024],f16> -> !torch.vtensor<[?,1024],f16>
    torch.bind_symbolic_shape %6458, [%292], affine_map<()[s0] -> (s0 * 128, 1024)> : !torch.vtensor<[?,1024],f16>
    %int4_7570 = torch.constant.int 4
    %int1024_7571 = torch.constant.int 1024
    %6459 = torch.prim.ListConstruct %int4_7570, %294, %int1024_7571 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6460 = torch.aten.view %6458, %6459 : !torch.vtensor<[?,1024],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1024],f16>
    torch.bind_symbolic_shape %6460, [%292], affine_map<()[s0] -> (4, s0 * 32, 1024)> : !torch.vtensor<[4,?,1024],f16>
    %int4_7572 = torch.constant.int 4
    %int32_7573 = torch.constant.int 32
    %int128_7574 = torch.constant.int 128
    %6461 = torch.prim.ListConstruct %int4_7572, %294, %int32_7573, %int128_7574 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6462 = torch.aten.view %6446, %6461 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %6462, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int4_7575 = torch.constant.int 4
    %int8_7576 = torch.constant.int 8
    %int128_7577 = torch.constant.int 128
    %6463 = torch.prim.ListConstruct %int4_7575, %294, %int8_7576, %int128_7577 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6464 = torch.aten.view %6453, %6463 : !torch.vtensor<[4,?,1024],f16>, !torch.list<int> -> !torch.vtensor<[4,?,8,128],f16>
    torch.bind_symbolic_shape %6464, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 128)> : !torch.vtensor<[4,?,8,128],f16>
    %int4_7578 = torch.constant.int 4
    %int8_7579 = torch.constant.int 8
    %int128_7580 = torch.constant.int 128
    %6465 = torch.prim.ListConstruct %int4_7578, %294, %int8_7579, %int128_7580 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6466 = torch.aten.view %6460, %6465 : !torch.vtensor<[4,?,1024],f16>, !torch.list<int> -> !torch.vtensor<[4,?,8,128],f16>
    torch.bind_symbolic_shape %6466, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 128)> : !torch.vtensor<[4,?,8,128],f16>
    %int131072_7581 = torch.constant.int 131072
    %none_7582 = torch.constant.none
    %none_7583 = torch.constant.none
    %cpu_7584 = torch.constant.device "cpu"
    %false_7585 = torch.constant.bool false
    %6467 = torch.aten.arange %int131072_7581, %none_7582, %none_7583, %cpu_7584, %false_7585 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64>
    %int0_7586 = torch.constant.int 0
    %int128_7587 = torch.constant.int 128
    %int2_7588 = torch.constant.int 2
    %none_7589 = torch.constant.none
    %none_7590 = torch.constant.none
    %cpu_7591 = torch.constant.device "cpu"
    %false_7592 = torch.constant.bool false
    %6468 = torch.aten.arange.start_step %int0_7586, %int128_7587, %int2_7588, %none_7589, %none_7590, %cpu_7591, %false_7592 : !torch.int, !torch.int, !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64>
    %int0_7593 = torch.constant.int 0
    %int0_7594 = torch.constant.int 0
    %int64_7595 = torch.constant.int 64
    %int1_7596 = torch.constant.int 1
    %6469 = torch.aten.slice.Tensor %6468, %int0_7593, %int0_7594, %int64_7595, %int1_7596 : !torch.vtensor<[64],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[64],si64>
    %int6_7597 = torch.constant.int 6
    %6470 = torch.prims.convert_element_type %6469, %int6_7597 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32>
    %int128_7598 = torch.constant.int 128
    %6471 = torch.aten.div.Scalar %6470, %int128_7598 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32>
    %float5.000000e05_7599 = torch.constant.float 5.000000e+05
    %6472 = torch.aten.pow.Scalar %float5.000000e05_7599, %6471 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %6473 = torch.aten.reciprocal %6472 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %float1.000000e00_7600 = torch.constant.float 1.000000e+00
    %6474 = torch.aten.mul.Scalar %6473, %float1.000000e00_7600 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32>
    %int131072_7601 = torch.constant.int 131072
    %int1_7602 = torch.constant.int 1
    %6475 = torch.prim.ListConstruct %int131072_7601, %int1_7602 : (!torch.int, !torch.int) -> !torch.list<int>
    %6476 = torch.aten.view %6467, %6475 : !torch.vtensor<[131072],si64>, !torch.list<int> -> !torch.vtensor<[131072,1],si64>
    %6477 = torch.aten.mul.Tensor %6476, %6474 : !torch.vtensor<[131072,1],si64>, !torch.vtensor<[64],f32> -> !torch.vtensor<[131072,64],f32>
    %6478 = torch.aten.cos %6477 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %6479 = torch.aten.sin %6477 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %6480 = torch.aten.complex %6478, %6479 : !torch.vtensor<[131072,64],f32>, !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],complex<f32>>
    %int1_7603 = torch.constant.int 1
    %6481 = torch.aten.size.int %6446, %int1_7603 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.int
    %int0_7604 = torch.constant.int 0
    %6482 = torch.aten.add.int %int0_7604, %6481 : !torch.int, !torch.int -> !torch.int
    %int0_7605 = torch.constant.int 0
    %int0_7606 = torch.constant.int 0
    %int1_7607 = torch.constant.int 1
    %6483 = torch.aten.slice.Tensor %6480, %int0_7605, %int0_7606, %6482, %int1_7607 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %6483, [%292], affine_map<()[s0] -> (s0 * 32, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_7608 = torch.constant.int 1
    %int0_7609 = torch.constant.int 0
    %int9223372036854775807_7610 = torch.constant.int 9223372036854775807
    %int1_7611 = torch.constant.int 1
    %6484 = torch.aten.slice.Tensor %6483, %int1_7608, %int0_7609, %int9223372036854775807_7610, %int1_7611 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %6484, [%292], affine_map<()[s0] -> (s0 * 32, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_7612 = torch.constant.int 0
    %6485 = torch.aten.unsqueeze %6484, %int0_7612 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %6485, [%292], affine_map<()[s0] -> (1, s0 * 32, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_7613 = torch.constant.int 2
    %6486 = torch.aten.unsqueeze %6485, %int2_7613 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %6486, [%292], affine_map<()[s0] -> (1, s0 * 32, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_7614 = torch.constant.int 3
    %int0_7615 = torch.constant.int 0
    %int9223372036854775807_7616 = torch.constant.int 9223372036854775807
    %int1_7617 = torch.constant.int 1
    %6487 = torch.aten.slice.Tensor %6486, %int3_7614, %int0_7615, %int9223372036854775807_7616, %int1_7617 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %6487, [%292], affine_map<()[s0] -> (1, s0 * 32, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %6488 = torch_c.to_builtin_tensor %6462 : !torch.vtensor<[4,?,32,128],f16> -> tensor<4x?x32x128xf16>
    %c1_7618 = arith.constant 1 : index
    %dim_7619 = tensor.dim %6488, %c1_7618 : tensor<4x?x32x128xf16>
    %6489 = flow.tensor.bitcast %6488 : tensor<4x?x32x128xf16>{%dim_7619} -> tensor<4x?x32x64xcomplex<f16>>{%dim_7619}
    %6490 = torch_c.from_builtin_tensor %6489 : tensor<4x?x32x64xcomplex<f16>> -> !torch.vtensor<[4,?,32,64],complex<f16>>
    torch.bind_symbolic_shape %6490, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 64)> : !torch.vtensor<[4,?,32,64],complex<f16>>
    %6491 = torch.aten.mul.Tensor %6490, %6487 : !torch.vtensor<[4,?,32,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,32,64],complex<f32>>
    torch.bind_symbolic_shape %6491, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 64)> : !torch.vtensor<[4,?,32,64],complex<f32>>
    %6492 = torch_c.to_builtin_tensor %6491 : !torch.vtensor<[4,?,32,64],complex<f32>> -> tensor<4x?x32x64xcomplex<f32>>
    %c1_7620 = arith.constant 1 : index
    %dim_7621 = tensor.dim %6492, %c1_7620 : tensor<4x?x32x64xcomplex<f32>>
    %6493 = flow.tensor.bitcast %6492 : tensor<4x?x32x64xcomplex<f32>>{%dim_7621} -> tensor<4x?x32x128xf32>{%dim_7621}
    %6494 = torch_c.from_builtin_tensor %6493 : tensor<4x?x32x128xf32> -> !torch.vtensor<[4,?,32,128],f32>
    torch.bind_symbolic_shape %6494, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f32>
    %int5_7622 = torch.constant.int 5
    %6495 = torch.prims.convert_element_type %6494, %int5_7622 : !torch.vtensor<[4,?,32,128],f32>, !torch.int -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %6495, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int131072_7623 = torch.constant.int 131072
    %none_7624 = torch.constant.none
    %none_7625 = torch.constant.none
    %cpu_7626 = torch.constant.device "cpu"
    %false_7627 = torch.constant.bool false
    %6496 = torch.aten.arange %int131072_7623, %none_7624, %none_7625, %cpu_7626, %false_7627 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64>
    %int0_7628 = torch.constant.int 0
    %int128_7629 = torch.constant.int 128
    %int2_7630 = torch.constant.int 2
    %none_7631 = torch.constant.none
    %none_7632 = torch.constant.none
    %cpu_7633 = torch.constant.device "cpu"
    %false_7634 = torch.constant.bool false
    %6497 = torch.aten.arange.start_step %int0_7628, %int128_7629, %int2_7630, %none_7631, %none_7632, %cpu_7633, %false_7634 : !torch.int, !torch.int, !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64>
    %int0_7635 = torch.constant.int 0
    %int0_7636 = torch.constant.int 0
    %int64_7637 = torch.constant.int 64
    %int1_7638 = torch.constant.int 1
    %6498 = torch.aten.slice.Tensor %6497, %int0_7635, %int0_7636, %int64_7637, %int1_7638 : !torch.vtensor<[64],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[64],si64>
    %int6_7639 = torch.constant.int 6
    %6499 = torch.prims.convert_element_type %6498, %int6_7639 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32>
    %int128_7640 = torch.constant.int 128
    %6500 = torch.aten.div.Scalar %6499, %int128_7640 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32>
    %float5.000000e05_7641 = torch.constant.float 5.000000e+05
    %6501 = torch.aten.pow.Scalar %float5.000000e05_7641, %6500 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %6502 = torch.aten.reciprocal %6501 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %float1.000000e00_7642 = torch.constant.float 1.000000e+00
    %6503 = torch.aten.mul.Scalar %6502, %float1.000000e00_7642 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32>
    %int131072_7643 = torch.constant.int 131072
    %int1_7644 = torch.constant.int 1
    %6504 = torch.prim.ListConstruct %int131072_7643, %int1_7644 : (!torch.int, !torch.int) -> !torch.list<int>
    %6505 = torch.aten.view %6496, %6504 : !torch.vtensor<[131072],si64>, !torch.list<int> -> !torch.vtensor<[131072,1],si64>
    %6506 = torch.aten.mul.Tensor %6505, %6503 : !torch.vtensor<[131072,1],si64>, !torch.vtensor<[64],f32> -> !torch.vtensor<[131072,64],f32>
    %6507 = torch.aten.cos %6506 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %6508 = torch.aten.sin %6506 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %6509 = torch.aten.complex %6507, %6508 : !torch.vtensor<[131072,64],f32>, !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],complex<f32>>
    %int1_7645 = torch.constant.int 1
    %6510 = torch.aten.size.int %6453, %int1_7645 : !torch.vtensor<[4,?,1024],f16>, !torch.int -> !torch.int
    %int0_7646 = torch.constant.int 0
    %6511 = torch.aten.add.int %int0_7646, %6510 : !torch.int, !torch.int -> !torch.int
    %int0_7647 = torch.constant.int 0
    %int0_7648 = torch.constant.int 0
    %int1_7649 = torch.constant.int 1
    %6512 = torch.aten.slice.Tensor %6509, %int0_7647, %int0_7648, %6511, %int1_7649 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %6512, [%292], affine_map<()[s0] -> (s0 * 32, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_7650 = torch.constant.int 1
    %int0_7651 = torch.constant.int 0
    %int9223372036854775807_7652 = torch.constant.int 9223372036854775807
    %int1_7653 = torch.constant.int 1
    %6513 = torch.aten.slice.Tensor %6512, %int1_7650, %int0_7651, %int9223372036854775807_7652, %int1_7653 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %6513, [%292], affine_map<()[s0] -> (s0 * 32, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_7654 = torch.constant.int 0
    %6514 = torch.aten.unsqueeze %6513, %int0_7654 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %6514, [%292], affine_map<()[s0] -> (1, s0 * 32, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_7655 = torch.constant.int 2
    %6515 = torch.aten.unsqueeze %6514, %int2_7655 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %6515, [%292], affine_map<()[s0] -> (1, s0 * 32, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_7656 = torch.constant.int 3
    %int0_7657 = torch.constant.int 0
    %int9223372036854775807_7658 = torch.constant.int 9223372036854775807
    %int1_7659 = torch.constant.int 1
    %6516 = torch.aten.slice.Tensor %6515, %int3_7656, %int0_7657, %int9223372036854775807_7658, %int1_7659 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %6516, [%292], affine_map<()[s0] -> (1, s0 * 32, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %6517 = torch_c.to_builtin_tensor %6464 : !torch.vtensor<[4,?,8,128],f16> -> tensor<4x?x8x128xf16>
    %c1_7660 = arith.constant 1 : index
    %dim_7661 = tensor.dim %6517, %c1_7660 : tensor<4x?x8x128xf16>
    %6518 = flow.tensor.bitcast %6517 : tensor<4x?x8x128xf16>{%dim_7661} -> tensor<4x?x8x64xcomplex<f16>>{%dim_7661}
    %6519 = torch_c.from_builtin_tensor %6518 : tensor<4x?x8x64xcomplex<f16>> -> !torch.vtensor<[4,?,8,64],complex<f16>>
    torch.bind_symbolic_shape %6519, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 64)> : !torch.vtensor<[4,?,8,64],complex<f16>>
    %6520 = torch.aten.mul.Tensor %6519, %6516 : !torch.vtensor<[4,?,8,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,8,64],complex<f32>>
    torch.bind_symbolic_shape %6520, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 64)> : !torch.vtensor<[4,?,8,64],complex<f32>>
    %6521 = torch_c.to_builtin_tensor %6520 : !torch.vtensor<[4,?,8,64],complex<f32>> -> tensor<4x?x8x64xcomplex<f32>>
    %c1_7662 = arith.constant 1 : index
    %dim_7663 = tensor.dim %6521, %c1_7662 : tensor<4x?x8x64xcomplex<f32>>
    %6522 = flow.tensor.bitcast %6521 : tensor<4x?x8x64xcomplex<f32>>{%dim_7663} -> tensor<4x?x8x128xf32>{%dim_7663}
    %6523 = torch_c.from_builtin_tensor %6522 : tensor<4x?x8x128xf32> -> !torch.vtensor<[4,?,8,128],f32>
    torch.bind_symbolic_shape %6523, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 128)> : !torch.vtensor<[4,?,8,128],f32>
    %int5_7664 = torch.constant.int 5
    %6524 = torch.prims.convert_element_type %6523, %int5_7664 : !torch.vtensor<[4,?,8,128],f32>, !torch.int -> !torch.vtensor<[4,?,8,128],f16>
    torch.bind_symbolic_shape %6524, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 128)> : !torch.vtensor<[4,?,8,128],f16>
    %int64_7665 = torch.constant.int 64
    %6525 = torch.aten.mul.Scalar %arg2, %int64_7665 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %6525, [%292], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int62 = torch.constant.int 62
    %int1_7666 = torch.constant.int 1
    %6526 = torch.aten.add.Scalar %6525, %int62, %int1_7666 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %6526, [%292], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int4_7667 = torch.constant.int 4
    %int32_7668 = torch.constant.int 32
    %int8_7669 = torch.constant.int 8
    %int128_7670 = torch.constant.int 128
    %6527 = torch.prim.ListConstruct %int4_7667, %425, %int32_7668, %int8_7669, %int128_7670 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6528 = torch.aten.view %6524, %6527 : !torch.vtensor<[4,?,8,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,8,128],f16>
    torch.bind_symbolic_shape %6528, [%292], affine_map<()[s0] -> (4, s0, 32, 8, 128)> : !torch.vtensor<[4,?,32,8,128],f16>
    %int4_7671 = torch.constant.int 4
    %6529 = torch.aten.mul.int %int4_7671, %425 : !torch.int, !torch.int -> !torch.int
    %int32_7672 = torch.constant.int 32
    %int8_7673 = torch.constant.int 8
    %int128_7674 = torch.constant.int 128
    %6530 = torch.prim.ListConstruct %6529, %int32_7672, %int8_7673, %int128_7674 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6531 = torch.aten.view %6528, %6530 : !torch.vtensor<[4,?,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %6531, [%292], affine_map<()[s0] -> (s0 * 4, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int4_7675 = torch.constant.int 4
    %6532 = torch.aten.mul.int %int4_7675, %425 : !torch.int, !torch.int -> !torch.int
    %6533 = torch.prim.ListConstruct %6532 : (!torch.int) -> !torch.list<int>
    %6534 = torch.aten.view %6526, %6533 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %6534, [%292], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int32_7676 = torch.constant.int 32
    %int2_7677 = torch.constant.int 2
    %int32_7678 = torch.constant.int 32
    %int8_7679 = torch.constant.int 8
    %int128_7680 = torch.constant.int 128
    %6535 = torch.prim.ListConstruct %416, %int32_7676, %int2_7677, %int32_7678, %int8_7679, %int128_7680 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6536 = torch.aten.view %6368, %6535 : !torch.vtensor<[?,2097152],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f16>
    torch.bind_symbolic_shape %6536, [%293], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f16>
    %int32_7681 = torch.constant.int 32
    %6537 = torch.aten.mul.int %416, %int32_7681 : !torch.int, !torch.int -> !torch.int
    %int2_7682 = torch.constant.int 2
    %6538 = torch.aten.mul.int %6537, %int2_7682 : !torch.int, !torch.int -> !torch.int
    %int32_7683 = torch.constant.int 32
    %int8_7684 = torch.constant.int 8
    %int128_7685 = torch.constant.int 128
    %6539 = torch.prim.ListConstruct %6538, %int32_7683, %int8_7684, %int128_7685 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6540 = torch.aten.view %6536, %6539 : !torch.vtensor<[?,32,2,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %6540, [%293], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %6541 = torch.prim.ListConstruct %6534 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_7686 = torch.constant.bool false
    %6542 = torch.aten.index_put %6540, %6541, %6531, %false_7686 : !torch.vtensor<[?,32,8,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,32,8,128],f16>, !torch.bool -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %6542, [%293], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int32_7687 = torch.constant.int 32
    %int2_7688 = torch.constant.int 2
    %int32_7689 = torch.constant.int 32
    %int8_7690 = torch.constant.int 8
    %int128_7691 = torch.constant.int 128
    %6543 = torch.prim.ListConstruct %416, %int32_7687, %int2_7688, %int32_7689, %int8_7690, %int128_7691 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6544 = torch.aten.view %6542, %6543 : !torch.vtensor<[?,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f16>
    torch.bind_symbolic_shape %6544, [%293], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f16>
    %int2097152_7692 = torch.constant.int 2097152
    %6545 = torch.prim.ListConstruct %416, %int2097152_7692 : (!torch.int, !torch.int) -> !torch.list<int>
    %6546 = torch.aten.view %6544, %6545 : !torch.vtensor<[?,32,2,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,2097152],f16>
    torch.bind_symbolic_shape %6546, [%293], affine_map<()[s0] -> (s0, 2097152)> : !torch.vtensor<[?,2097152],f16>
    %int32_7693 = torch.constant.int 32
    %int2_7694 = torch.constant.int 2
    %int32_7695 = torch.constant.int 32
    %int8_7696 = torch.constant.int 8
    %int128_7697 = torch.constant.int 128
    %6547 = torch.prim.ListConstruct %416, %int32_7693, %int2_7694, %int32_7695, %int8_7696, %int128_7697 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6548 = torch.aten.view %6546, %6547 : !torch.vtensor<[?,2097152],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f16>
    torch.bind_symbolic_shape %6548, [%293], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f16>
    %int32_7698 = torch.constant.int 32
    %int8_7699 = torch.constant.int 8
    %int128_7700 = torch.constant.int 128
    %6549 = torch.prim.ListConstruct %6538, %int32_7698, %int8_7699, %int128_7700 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6550 = torch.aten.view %6548, %6549 : !torch.vtensor<[?,32,2,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %6550, [%293], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int4_7701 = torch.constant.int 4
    %int32_7702 = torch.constant.int 32
    %int8_7703 = torch.constant.int 8
    %int128_7704 = torch.constant.int 128
    %6551 = torch.prim.ListConstruct %int4_7701, %425, %int32_7702, %int8_7703, %int128_7704 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6552 = torch.aten.view %6466, %6551 : !torch.vtensor<[4,?,8,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,8,128],f16>
    torch.bind_symbolic_shape %6552, [%292], affine_map<()[s0] -> (4, s0, 32, 8, 128)> : !torch.vtensor<[4,?,32,8,128],f16>
    %int4_7705 = torch.constant.int 4
    %6553 = torch.aten.mul.int %int4_7705, %425 : !torch.int, !torch.int -> !torch.int
    %int32_7706 = torch.constant.int 32
    %int8_7707 = torch.constant.int 8
    %int128_7708 = torch.constant.int 128
    %6554 = torch.prim.ListConstruct %6553, %int32_7706, %int8_7707, %int128_7708 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6555 = torch.aten.view %6552, %6554 : !torch.vtensor<[4,?,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %6555, [%292], affine_map<()[s0] -> (s0 * 4, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int1_7709 = torch.constant.int 1
    %int1_7710 = torch.constant.int 1
    %6556 = torch.aten.add.Scalar %6526, %int1_7709, %int1_7710 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %6556, [%292], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int4_7711 = torch.constant.int 4
    %6557 = torch.aten.mul.int %int4_7711, %425 : !torch.int, !torch.int -> !torch.int
    %6558 = torch.prim.ListConstruct %6557 : (!torch.int) -> !torch.list<int>
    %6559 = torch.aten.view %6556, %6558 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %6559, [%292], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %6560 = torch.prim.ListConstruct %6559 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_7712 = torch.constant.bool false
    %6561 = torch.aten.index_put %6550, %6560, %6555, %false_7712 : !torch.vtensor<[?,32,8,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,32,8,128],f16>, !torch.bool -> !torch.vtensor<[?,32,8,128],f16>
    torch.bind_symbolic_shape %6561, [%293], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f16>
    %int32_7713 = torch.constant.int 32
    %int2_7714 = torch.constant.int 2
    %int32_7715 = torch.constant.int 32
    %int8_7716 = torch.constant.int 8
    %int128_7717 = torch.constant.int 128
    %6562 = torch.prim.ListConstruct %416, %int32_7713, %int2_7714, %int32_7715, %int8_7716, %int128_7717 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6563 = torch.aten.view %6561, %6562 : !torch.vtensor<[?,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f16>
    torch.bind_symbolic_shape %6563, [%293], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f16>
    %int2097152_7718 = torch.constant.int 2097152
    %6564 = torch.prim.ListConstruct %416, %int2097152_7718 : (!torch.int, !torch.int) -> !torch.list<int>
    %6565 = torch.aten.view %6563, %6564 : !torch.vtensor<[?,32,2,32,8,128],f16>, !torch.list<int> -> !torch.vtensor<[?,2097152],f16>
    torch.overwrite.tensor.contents %6565 overwrites %arg3 : !torch.vtensor<[?,2097152],f16>, !torch.tensor<[?,2097152],f16>
    torch.bind_symbolic_shape %6565, [%293], affine_map<()[s0] -> (s0, 2097152)> : !torch.vtensor<[?,2097152],f16>
    %int-2_7719 = torch.constant.int -2
    %6566 = torch.aten.unsqueeze %6524, %int-2_7719 : !torch.vtensor<[4,?,8,128],f16>, !torch.int -> !torch.vtensor<[4,?,8,1,128],f16>
    torch.bind_symbolic_shape %6566, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 1, 128)> : !torch.vtensor<[4,?,8,1,128],f16>
    %int4_7720 = torch.constant.int 4
    %int8_7721 = torch.constant.int 8
    %int4_7722 = torch.constant.int 4
    %int128_7723 = torch.constant.int 128
    %6567 = torch.prim.ListConstruct %int4_7720, %6510, %int8_7721, %int4_7722, %int128_7723 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_7724 = torch.constant.bool false
    %6568 = torch.aten.expand %6566, %6567, %false_7724 : !torch.vtensor<[4,?,8,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,8,4,128],f16>
    torch.bind_symbolic_shape %6568, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 4, 128)> : !torch.vtensor<[4,?,8,4,128],f16>
    %int0_7725 = torch.constant.int 0
    %6569 = torch.aten.clone %6568, %int0_7725 : !torch.vtensor<[4,?,8,4,128],f16>, !torch.int -> !torch.vtensor<[4,?,8,4,128],f16>
    torch.bind_symbolic_shape %6569, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 4, 128)> : !torch.vtensor<[4,?,8,4,128],f16>
    %int4_7726 = torch.constant.int 4
    %int32_7727 = torch.constant.int 32
    %int128_7728 = torch.constant.int 128
    %6570 = torch.prim.ListConstruct %int4_7726, %6510, %int32_7727, %int128_7728 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6571 = torch.aten._unsafe_view %6569, %6570 : !torch.vtensor<[4,?,8,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %6571, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int-2_7729 = torch.constant.int -2
    %6572 = torch.aten.unsqueeze %6466, %int-2_7729 : !torch.vtensor<[4,?,8,128],f16>, !torch.int -> !torch.vtensor<[4,?,8,1,128],f16>
    torch.bind_symbolic_shape %6572, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 1, 128)> : !torch.vtensor<[4,?,8,1,128],f16>
    %int1_7730 = torch.constant.int 1
    %6573 = torch.aten.size.int %6460, %int1_7730 : !torch.vtensor<[4,?,1024],f16>, !torch.int -> !torch.int
    %int4_7731 = torch.constant.int 4
    %int8_7732 = torch.constant.int 8
    %int4_7733 = torch.constant.int 4
    %int128_7734 = torch.constant.int 128
    %6574 = torch.prim.ListConstruct %int4_7731, %6573, %int8_7732, %int4_7733, %int128_7734 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_7735 = torch.constant.bool false
    %6575 = torch.aten.expand %6572, %6574, %false_7735 : !torch.vtensor<[4,?,8,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,8,4,128],f16>
    torch.bind_symbolic_shape %6575, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 4, 128)> : !torch.vtensor<[4,?,8,4,128],f16>
    %int0_7736 = torch.constant.int 0
    %6576 = torch.aten.clone %6575, %int0_7736 : !torch.vtensor<[4,?,8,4,128],f16>, !torch.int -> !torch.vtensor<[4,?,8,4,128],f16>
    torch.bind_symbolic_shape %6576, [%292], affine_map<()[s0] -> (4, s0 * 32, 8, 4, 128)> : !torch.vtensor<[4,?,8,4,128],f16>
    %int4_7737 = torch.constant.int 4
    %int32_7738 = torch.constant.int 32
    %int128_7739 = torch.constant.int 128
    %6577 = torch.prim.ListConstruct %int4_7737, %6573, %int32_7738, %int128_7739 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6578 = torch.aten._unsafe_view %6576, %6577 : !torch.vtensor<[4,?,8,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %6578, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int1_7740 = torch.constant.int 1
    %int2_7741 = torch.constant.int 2
    %6579 = torch.aten.transpose.int %6495, %int1_7740, %int2_7741 : !torch.vtensor<[4,?,32,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,32,?,128],f16>
    torch.bind_symbolic_shape %6579, [%292], affine_map<()[s0] -> (4, 32, s0 * 32, 128)> : !torch.vtensor<[4,32,?,128],f16>
    %int1_7742 = torch.constant.int 1
    %int2_7743 = torch.constant.int 2
    %6580 = torch.aten.transpose.int %6571, %int1_7742, %int2_7743 : !torch.vtensor<[4,?,32,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,32,?,128],f16>
    torch.bind_symbolic_shape %6580, [%292], affine_map<()[s0] -> (4, 32, s0 * 32, 128)> : !torch.vtensor<[4,32,?,128],f16>
    %int1_7744 = torch.constant.int 1
    %int2_7745 = torch.constant.int 2
    %6581 = torch.aten.transpose.int %6578, %int1_7744, %int2_7745 : !torch.vtensor<[4,?,32,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,32,?,128],f16>
    torch.bind_symbolic_shape %6581, [%292], affine_map<()[s0] -> (4, 32, s0 * 32, 128)> : !torch.vtensor<[4,32,?,128],f16>
    %float0.000000e00_7746 = torch.constant.float 0.000000e+00
    %false_7747 = torch.constant.bool false
    %none_7748 = torch.constant.none
    %6582:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%6579, %6580, %6581, %float0.000000e00_7746, %false_7747, %320, %none_7748) : (!torch.vtensor<[4,32,?,128],f16>, !torch.vtensor<[4,32,?,128],f16>, !torch.vtensor<[4,32,?,128],f16>, !torch.float, !torch.bool, !torch.vtensor<[4,1,?,?],f16>, !torch.none) -> (!torch.vtensor<[4,32,?,128],f16>, !torch.vtensor<[4,32,?],f32>) 
    torch.bind_symbolic_shape %6582#0, [%292], affine_map<()[s0] -> (4, 32, s0 * 32, 128)> : !torch.vtensor<[4,32,?,128],f16>
    %int1_7749 = torch.constant.int 1
    %int2_7750 = torch.constant.int 2
    %6583 = torch.aten.transpose.int %6582#0, %int1_7749, %int2_7750 : !torch.vtensor<[4,32,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,32,128],f16>
    torch.bind_symbolic_shape %6583, [%292], affine_map<()[s0] -> (4, s0 * 32, 32, 128)> : !torch.vtensor<[4,?,32,128],f16>
    %int4_7751 = torch.constant.int 4
    %int4096_7752 = torch.constant.int 4096
    %6584 = torch.prim.ListConstruct %int4_7751, %6481, %int4096_7752 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6585 = torch.aten.view %6583, %6584 : !torch.vtensor<[4,?,32,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %6585, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int-2_7753 = torch.constant.int -2
    %int-1_7754 = torch.constant.int -1
    %6586 = torch.aten.transpose.int %284, %int-2_7753, %int-1_7754 : !torch.vtensor<[4096,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,4096],f16>
    %int4_7755 = torch.constant.int 4
    %6587 = torch.aten.mul.int %int4_7755, %6481 : !torch.int, !torch.int -> !torch.int
    %int4096_7756 = torch.constant.int 4096
    %6588 = torch.prim.ListConstruct %6587, %int4096_7756 : (!torch.int, !torch.int) -> !torch.list<int>
    %6589 = torch.aten.view %6585, %6588 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %6589, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %6590 = torch.aten.mm %6589, %6586 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %6590, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_7757 = torch.constant.int 4
    %int4096_7758 = torch.constant.int 4096
    %6591 = torch.prim.ListConstruct %int4_7757, %6481, %int4096_7758 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6592 = torch.aten.view %6590, %6591 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %6592, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_7759 = torch.constant.int 1
    %6593 = torch.aten.add.Tensor %6430, %6592, %int1_7759 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %6593, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int6_7760 = torch.constant.int 6
    %6594 = torch.prims.convert_element_type %6593, %int6_7760 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %6594, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_7761 = torch.constant.int 2
    %6595 = torch.aten.pow.Tensor_Scalar %6594, %int2_7761 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %6595, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int-1_7762 = torch.constant.int -1
    %6596 = torch.prim.ListConstruct %int-1_7762 : (!torch.int) -> !torch.list<int>
    %true_7763 = torch.constant.bool true
    %none_7764 = torch.constant.none
    %6597 = torch.aten.mean.dim %6595, %6596, %true_7763, %none_7764 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %6597, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_7765 = torch.constant.float 9.9999997473787516E-6
    %int1_7766 = torch.constant.int 1
    %6598 = torch.aten.add.Scalar %6597, %float9.999990e-06_7765, %int1_7766 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %6598, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %6599 = torch.aten.rsqrt %6598 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %6599, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %6600 = torch.aten.mul.Tensor %6594, %6599 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %6600, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %6601 = torch.aten.mul.Tensor %285, %6600 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %6601, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int5_7767 = torch.constant.int 5
    %6602 = torch.prims.convert_element_type %6601, %int5_7767 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %6602, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int-2_7768 = torch.constant.int -2
    %int-1_7769 = torch.constant.int -1
    %6603 = torch.aten.transpose.int %286, %int-2_7768, %int-1_7769 : !torch.vtensor<[14336,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,14336],f16>
    %int4_7770 = torch.constant.int 4
    %6604 = torch.aten.mul.int %int4_7770, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_7771 = torch.constant.int 4096
    %6605 = torch.prim.ListConstruct %6604, %int4096_7771 : (!torch.int, !torch.int) -> !torch.list<int>
    %6606 = torch.aten.view %6602, %6605 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %6606, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %6607 = torch.aten.mm %6606, %6603 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,14336],f16> -> !torch.vtensor<[?,14336],f16>
    torch.bind_symbolic_shape %6607, [%292], affine_map<()[s0] -> (s0 * 128, 14336)> : !torch.vtensor<[?,14336],f16>
    %int4_7772 = torch.constant.int 4
    %int14336_7773 = torch.constant.int 14336
    %6608 = torch.prim.ListConstruct %int4_7772, %294, %int14336_7773 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6609 = torch.aten.view %6607, %6608 : !torch.vtensor<[?,14336],f16>, !torch.list<int> -> !torch.vtensor<[4,?,14336],f16>
    torch.bind_symbolic_shape %6609, [%292], affine_map<()[s0] -> (4, s0 * 32, 14336)> : !torch.vtensor<[4,?,14336],f16>
    %6610 = torch.aten.silu %6609 : !torch.vtensor<[4,?,14336],f16> -> !torch.vtensor<[4,?,14336],f16>
    torch.bind_symbolic_shape %6610, [%292], affine_map<()[s0] -> (4, s0 * 32, 14336)> : !torch.vtensor<[4,?,14336],f16>
    %int-2_7774 = torch.constant.int -2
    %int-1_7775 = torch.constant.int -1
    %6611 = torch.aten.transpose.int %287, %int-2_7774, %int-1_7775 : !torch.vtensor<[14336,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,14336],f16>
    %int4_7776 = torch.constant.int 4
    %6612 = torch.aten.mul.int %int4_7776, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_7777 = torch.constant.int 4096
    %6613 = torch.prim.ListConstruct %6612, %int4096_7777 : (!torch.int, !torch.int) -> !torch.list<int>
    %6614 = torch.aten.view %6602, %6613 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %6614, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %6615 = torch.aten.mm %6614, %6611 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,14336],f16> -> !torch.vtensor<[?,14336],f16>
    torch.bind_symbolic_shape %6615, [%292], affine_map<()[s0] -> (s0 * 128, 14336)> : !torch.vtensor<[?,14336],f16>
    %int4_7778 = torch.constant.int 4
    %int14336_7779 = torch.constant.int 14336
    %6616 = torch.prim.ListConstruct %int4_7778, %294, %int14336_7779 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6617 = torch.aten.view %6615, %6616 : !torch.vtensor<[?,14336],f16>, !torch.list<int> -> !torch.vtensor<[4,?,14336],f16>
    torch.bind_symbolic_shape %6617, [%292], affine_map<()[s0] -> (4, s0 * 32, 14336)> : !torch.vtensor<[4,?,14336],f16>
    %6618 = torch.aten.mul.Tensor %6610, %6617 : !torch.vtensor<[4,?,14336],f16>, !torch.vtensor<[4,?,14336],f16> -> !torch.vtensor<[4,?,14336],f16>
    torch.bind_symbolic_shape %6618, [%292], affine_map<()[s0] -> (4, s0 * 32, 14336)> : !torch.vtensor<[4,?,14336],f16>
    %int-2_7780 = torch.constant.int -2
    %int-1_7781 = torch.constant.int -1
    %6619 = torch.aten.transpose.int %288, %int-2_7780, %int-1_7781 : !torch.vtensor<[4096,14336],f16>, !torch.int, !torch.int -> !torch.vtensor<[14336,4096],f16>
    %int1_7782 = torch.constant.int 1
    %6620 = torch.aten.size.int %6609, %int1_7782 : !torch.vtensor<[4,?,14336],f16>, !torch.int -> !torch.int
    %int4_7783 = torch.constant.int 4
    %6621 = torch.aten.mul.int %int4_7783, %6620 : !torch.int, !torch.int -> !torch.int
    %int14336_7784 = torch.constant.int 14336
    %6622 = torch.prim.ListConstruct %6621, %int14336_7784 : (!torch.int, !torch.int) -> !torch.list<int>
    %6623 = torch.aten.view %6618, %6622 : !torch.vtensor<[4,?,14336],f16>, !torch.list<int> -> !torch.vtensor<[?,14336],f16>
    torch.bind_symbolic_shape %6623, [%292], affine_map<()[s0] -> (s0 * 128, 14336)> : !torch.vtensor<[?,14336],f16>
    %6624 = torch.aten.mm %6623, %6619 : !torch.vtensor<[?,14336],f16>, !torch.vtensor<[14336,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %6624, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_7785 = torch.constant.int 4
    %int4096_7786 = torch.constant.int 4096
    %6625 = torch.prim.ListConstruct %int4_7785, %6620, %int4096_7786 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6626 = torch.aten.view %6624, %6625 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %6626, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_7787 = torch.constant.int 1
    %6627 = torch.aten.add.Tensor %6593, %6626, %int1_7787 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %6627, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int6_7788 = torch.constant.int 6
    %6628 = torch.prims.convert_element_type %6627, %int6_7788 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %6628, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_7789 = torch.constant.int 2
    %6629 = torch.aten.pow.Tensor_Scalar %6628, %int2_7789 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %6629, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int-1_7790 = torch.constant.int -1
    %6630 = torch.prim.ListConstruct %int-1_7790 : (!torch.int) -> !torch.list<int>
    %true_7791 = torch.constant.bool true
    %none_7792 = torch.constant.none
    %6631 = torch.aten.mean.dim %6629, %6630, %true_7791, %none_7792 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %6631, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_7793 = torch.constant.float 9.9999997473787516E-6
    %int1_7794 = torch.constant.int 1
    %6632 = torch.aten.add.Scalar %6631, %float9.999990e-06_7793, %int1_7794 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %6632, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %6633 = torch.aten.rsqrt %6632 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %6633, [%292], affine_map<()[s0] -> (4, s0 * 32, 1)> : !torch.vtensor<[4,?,1],f32>
    %6634 = torch.aten.mul.Tensor %6628, %6633 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %6634, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %6635 = torch.aten.mul.Tensor %289, %6634 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %6635, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int5_7795 = torch.constant.int 5
    %6636 = torch.prims.convert_element_type %6635, %int5_7795 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %6636, [%292], affine_map<()[s0] -> (4, s0 * 32, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int-2_7796 = torch.constant.int -2
    %int-1_7797 = torch.constant.int -1
    %6637 = torch.aten.transpose.int %290, %int-2_7796, %int-1_7797 : !torch.vtensor<[128256,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[4096,128256],f16>
    %int4_7798 = torch.constant.int 4
    %6638 = torch.aten.mul.int %int4_7798, %294 : !torch.int, !torch.int -> !torch.int
    %int4096_7799 = torch.constant.int 4096
    %6639 = torch.prim.ListConstruct %6638, %int4096_7799 : (!torch.int, !torch.int) -> !torch.list<int>
    %6640 = torch.aten.view %6636, %6639 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %6640, [%292], affine_map<()[s0] -> (s0 * 128, 4096)> : !torch.vtensor<[?,4096],f16>
    %6641 = torch.aten.mm %6640, %6637 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128256],f16> -> !torch.vtensor<[?,128256],f16>
    torch.bind_symbolic_shape %6641, [%292], affine_map<()[s0] -> (s0 * 128, 128256)> : !torch.vtensor<[?,128256],f16>
    %int4_7800 = torch.constant.int 4
    %int128256 = torch.constant.int 128256
    %6642 = torch.prim.ListConstruct %int4_7800, %294, %int128256 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6643 = torch.aten.view %6641, %6642 : !torch.vtensor<[?,128256],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128256],f16>
    torch.bind_symbolic_shape %6643, [%292], affine_map<()[s0] -> (4, s0 * 32, 128256)> : !torch.vtensor<[4,?,128256],f16>
    return %6643 : !torch.vtensor<[4,?,128256],f16>
  }
}